{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "==================================================\n",
      "                      IN\n",
      "==================================================\n",
      "\n",
      "Input text: Hello, I am\n",
      "Encoded input text: [15496, 11, 314, 716]\n",
      "encoded_tensor: tensor([[15496,    11,   314,   716]])\n",
      "\n",
      "\n",
      "==================================================\n",
      "                      OUT\n",
      "==================================================\n",
      "\n",
      "Output: tensor([[15496,    11,   314,   716, 27018, 24086, 47843, 30961, 42348,  7267,\n",
      "         49706, 43231, 47062, 34657]])\n",
      "Output length: 14\n",
      "Output text: Hello, I am Featureiman Byeswickattribute argue logger Normandy Compton analogous\n"
     ]
    }
   ],
   "source": [
    "import tiktoken\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "\n",
    "class GPTDatasetV1(Dataset):\n",
    "\n",
    "    def __init__(self, txt, tokenizer, max_length, stride):\n",
    "        self.input_ids = []\n",
    "        self.target_ids = []\n",
    "\n",
    "        #tokenize the entire text\n",
    "        token_ids = tokenizer.encode(txt, allowed_special={\"<|endoftext|\"})\n",
    "\n",
    "        # use a sliding window to chunk the book into overlapping sequences of max_length\n",
    "        for i in range(0, len(token_ids)-max_length, stride):\n",
    "            input_chunk = token_ids[i:i+max_length]\n",
    "            target_chunk = token_ids[i+1:i+max_length+1]\n",
    "            self.input_ids.append(torch.tensor(input_chunk))\n",
    "            self.target_ids.append(torch.tensor(target_chunk))\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.input_ids)\n",
    "        \n",
    "    def __getitem__(self, idx):\n",
    "        return self.input_ids[idx], self.target_ids[idx]\n",
    "        \n",
    "def create_dataloader_v1(txt, batch_size=4, max_length=256, stride=128, shuffle=True, drop_last=True, num_workers=0):\n",
    "    # initialize the tokenizer\n",
    "    tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
    "    # create dataset\n",
    "    dataset = GPTDatasetV1(txt, tokenizer, max_length, stride)\n",
    "    # create dataloader\n",
    "    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers) \n",
    "    return dataloader\n",
    "\n",
    "\n",
    "class MultiHeadAttention(nn.Module):\n",
    "\n",
    "    def __init__(self, d_in, d_out, context_length, dropout, num_heads, qkv_bias=False):\n",
    "        super().__init__()\n",
    "        assert d_out % num_heads == 0,\"d_out 需要被 num_heads 整除\"\n",
    "        self.d_out = d_out\n",
    "        self.num_heads = num_heads\n",
    "        self.head_dim = d_out // num_heads\n",
    "\n",
    "        self.W_query = nn.Linear(d_in, d_out, bias = qkv_bias)\n",
    "        self.W_key = nn.Linear(d_in, d_out, bias = qkv_bias)\n",
    "        self.W_value = nn.Linear(d_in, d_out, bias = qkv_bias)\n",
    "        self.out_proj = nn.Linear(d_out, d_out)  #Linear layer to combine heads output\n",
    "        self.dropout = nn.Dropout(dropout)\n",
    "        self.register_buffer('mask', torch.triu(torch.ones(context_length, context_length), diagonal=1))\n",
    "\n",
    "    def forward(self, x):\n",
    "        b, num_tokens, d_in = x.shape\n",
    "\n",
    "        keys = self.W_key(x)    #shape:(b,num_tokens,d_out)\n",
    "        queries = self.W_query(x)\n",
    "        values = self.W_value(x)\n",
    "\n",
    "        # we implicitly split the maxtrix by adding a 'num_heads' dim\n",
    "        # unroll last dim:(b,num_tokens,d_out)-->(b,num_tokens,num_heads,head_dim)\n",
    "        keys = keys.view(b, num_tokens, self.num_heads, self.head_dim)\n",
    "        values = values.view(b, num_tokens, self.num_heads, self.head_dim)\n",
    "        queries = queries.view(b, num_tokens, self.num_heads, self.head_dim)\n",
    "\n",
    "        # transpose:(b,num_tokens,num_heads,head_dim)->(b,num_heads,num_tokens,head_dim)\n",
    "        keys = keys.transpose(1,2)\n",
    "        queries = queries.transpose(1,2)\n",
    "        values = values.transpose(1,2)\n",
    "\n",
    "        # compute scaled dot-product attention(self-attention) with a causal mask\n",
    "        attn_scores = queries @ keys.transpose(2,3)  # Dot product for each head\n",
    "        # original mask trancated to the number of tokens and converted to bool\n",
    "        mask_bool = self.mask.bool()[:num_tokens, :num_tokens]\n",
    "        # use the mask to fill attention scores\n",
    "        attn_scores.masked_fill_(mask_bool, -torch.inf)\n",
    "\n",
    "        attn_weights = torch.softmax(attn_scores/keys.shape[-1]**0.5,dim=-1)\n",
    "        attn_weights = self.dropout(attn_weights)\n",
    "\n",
    "        # shape：(b,num_tokens, num_heads,head_dim)\n",
    "        context_vec = (attn_weights@values).transpose(1,2)\n",
    "\n",
    "        # combine heads,where self.d_out = self.num_heads*self.head_dim\n",
    "        context_vec = context_vec.reshape(b,num_tokens,self.d_out)\n",
    "        context_vec = self.out_proj(context_vec)   # optional projection\n",
    "\n",
    "        return context_vec\n",
    "    \n",
    "class LayerNorm(nn.Module):\n",
    "    def __init__(self, emb_dim):\n",
    "        super().__init__()\n",
    "        self.eps = 1e-5\n",
    "        self.scale = nn.Parameter(torch.ones(emb_dim))\n",
    "        self.shift = nn.Parameter(torch.zeros(emb_dim))\n",
    "\n",
    "    def forward(self, x):\n",
    "        mean = x.mean(dim=-1,keepdim=True)\n",
    "        var = x.var(dim=-1,keepdim=True,unbiased=False)\n",
    "        norm_x = (x-mean)/torch.sqrt(var+self.eps)\n",
    "        return self.scale*norm_x+self.shift\n",
    "    \n",
    "class GELU(nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "\n",
    "    def forward(self, x):\n",
    "        return 0.5 * x * (1 + torch.tanh(\n",
    "            torch.sqrt(torch.tensor(2.0 / torch.pi)) *\n",
    "            (x + 0.044715 * torch.pow(x, 3))\n",
    "        ))\n",
    "    \n",
    "class FeedForward(nn.Module):\n",
    "    def __init__(self, cfg):\n",
    "        super().__init__()\n",
    "        self.layers = nn.Sequential(\n",
    "            nn.Linear(cfg[\"emb_dim\"], 4*cfg[\"emb_dim\"]),\n",
    "            GELU(),\n",
    "            nn.Linear(4*cfg[\"emb_dim\"], cfg[\"emb_dim\"]),\n",
    "        )\n",
    "\n",
    "    def forward(self, x):\n",
    "        return self.layers(x)\n",
    "    \n",
    "class TransformerBlock(nn.Module):\n",
    "    def __init__(self, cfg):\n",
    "        super().__init__()\n",
    "        self.att = MultiHeadAttention(\n",
    "            d_in=cfg[\"emb_dim\"],\n",
    "            d_out=cfg[\"emb_dim\"],\n",
    "            context_length=cfg[\"context_length\"],\n",
    "            num_heads=cfg[\"n_heads\"],\n",
    "            dropout=cfg[\"drop_rate\"],\n",
    "            qkv_bias=cfg[\"qkv_bias\"]\n",
    "        )\n",
    "        self.ff = FeedForward(cfg)\n",
    "        self.norm1 = LayerNorm(cfg[\"emb_dim\"])\n",
    "        self.norm2 = LayerNorm(cfg[\"emb_dim\"])\n",
    "        self.drop_shortcut = nn.Dropout(cfg[\"drop_rate\"])\n",
    "\n",
    "    def forward(self, x):\n",
    "        # shortcut for attention block\n",
    "        shortcut = x\n",
    "        x = self.norm1(x)\n",
    "        x = self.att(x)   # Shape [batch_size, num_tokens, emb_size]\n",
    "        x = self.drop_shortcut(x)\n",
    "        x = x+shortcut  # Add the original input back\n",
    "        # shortcut for feed-forward block\n",
    "        shortcut = x\n",
    "        x = self.norm2(x)\n",
    "        x = self.ff(x)\n",
    "        x = self.drop_shortcut(x)\n",
    "        x = x+shortcut\n",
    "\n",
    "        return x\n",
    "    \n",
    "class GPTModel(nn.Module):\n",
    "    def __init__(self, cfg):\n",
    "        super().__init__()\n",
    "        self.tok_emb = nn.Embedding(cfg[\"vocab_size\"], cfg[\"emb_dim\"])\n",
    "        self.pos_emb = nn.Embedding(cfg[\"context_length\"], cfg[\"emb_dim\"])\n",
    "        self.drop_emb = nn.Dropout(cfg[\"drop_rate\"])\n",
    "\n",
    "        self.trf_blocks = nn.Sequential(\n",
    "            *[TransformerBlock(cfg) for _ in range(cfg[\"n_layers\"])]\n",
    "        )\n",
    "\n",
    "        self.final_norm = LayerNorm(cfg[\"emb_dim\"])\n",
    "        self.out_head = nn.Linear(cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False)\n",
    "\n",
    "    def forward(self, in_idx):\n",
    "        batch_size, seq_len = in_idx.shape\n",
    "        tok_embeds = self.tok_emb(in_idx)\n",
    "        pos_embeds = self.pos_emb(torch.arange(seq_len, device=in_idx.device))\n",
    "        x = tok_embeds+pos_embeds   # Shape [batch_size, num_tokens, emb_size]\n",
    "        x = self.drop_emb(x)\n",
    "        x = self.trf_blocks(x)\n",
    "        x = self.final_norm(x)\n",
    "        logits = self.out_head(x)\n",
    "        return logits\n",
    "    \n",
    "def generate_text_simple(model, idx, max_new_tokens, context_size):\n",
    "    # idx is (B, T) array of indices in the current context\n",
    "    for _ in range(max_new_tokens):\n",
    "        # crop current context if it exceeds the supported context size\n",
    "        # eg, if LLM supoorts only 5 tokens, and the context size is 10\n",
    "        # then only the last 5 tokens are used as context\n",
    "        idx_cond = idx[:, -context_size:]\n",
    "        # Get the predictions\n",
    "        with torch.no_grad():\n",
    "            logits = model(idx_cond)\n",
    "\n",
    "        # focus only on the last time step\n",
    "        # (batch, n_token, vocab_size) ->(batch, vocab_size)\n",
    "        logits = logits[:, -1, :]\n",
    "        #Get the idx of the vocab entry with the highest logits value\n",
    "        idx_next = torch.argmax(logits, dim=-1, keepdim=True)  #shape:(batch,1)\n",
    "        #append sampled index to the running sequence\n",
    "        idx = torch.cat((idx, idx_next), dim=1)  #shape: (batch, n_token+1)\n",
    "    return idx\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    GPT_CONFIG_124M ={\n",
    "        \"vocab_size\": 50257,     # Vocabulary size\n",
    "        \"context_length\": 1024,  # Context length\n",
    "        \"emb_dim\": 768,          # Embedding dimension\n",
    "        \"n_heads\": 12,           # Number of attention heads\n",
    "        \"n_layers\": 12,          # Number of layers\n",
    "        \"drop_rate\": 0.1,        # Dropout rate\n",
    "        \"qkv_bias\": False        # Query-Key-Value bias\n",
    "    } \n",
    "\n",
    "    torch.manual_seed(123)\n",
    "    model = GPTModel(GPT_CONFIG_124M)\n",
    "    model.eval()  # disable dropout\n",
    "    \n",
    "    start_context = \"Hello, I am\"\n",
    "\n",
    "    tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
    "    encoded = tokenizer.encode(start_context)\n",
    "    encoded_tensor = torch.tensor(encoded).unsqueeze(0)\n",
    "\n",
    "    print(f\"\\n{50*'='}\\n{22*' '}IN\\n{50*'='}\")\n",
    "    print(\"\\nInput text:\", start_context)\n",
    "    print(\"Encoded input text:\", encoded)\n",
    "    print(\"encoded_tensor:\", encoded_tensor)\n",
    "\n",
    "    out = generate_text_simple(model=model, idx=encoded_tensor, max_new_tokens=10,context_size=GPT_CONFIG_124M[\"context_length\"])\n",
    "    decoded_text = tokenizer.decode(out.squeeze(0).tolist())\n",
    "\n",
    "    print(f\"\\n\\n{50*'='}\\n{22*' '}OUT\\n{50*'='}\")\n",
    "    print(\"\\nOutput:\", out)\n",
    "    print(\"Output length:\", len(out[0]))\n",
    "    print(\"Output text:\", decoded_text)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "matplotlib version: 3.9.0\n",
      "numpy version: 1.26.4\n",
      "tiktoken version: 0.7.0\n",
      "torch version: 2.2.0\n",
      "tensorflow version: 2.16.2\n"
     ]
    }
   ],
   "source": [
    "from importlib.metadata import version\n",
    "\n",
    "pkgs = [\n",
    "    \"matplotlib\",\n",
    "    \"numpy\",\n",
    "    \"tiktoken\",\n",
    "    \"torch\",\n",
    "    \"tensorflow\"\n",
    "]\n",
    "for p in pkgs:\n",
    "    print(f\"{p} version: {version(p)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GPTModel(\n",
       "  (tok_emb): Embedding(50257, 768)\n",
       "  (pos_emb): Embedding(256, 768)\n",
       "  (drop_emb): Dropout(p=0.1, inplace=False)\n",
       "  (trf_blocks): Sequential(\n",
       "    (0): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (1): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (2): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (3): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (4): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (5): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (6): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (7): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (8): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (9): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (10): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "    (11): TransformerBlock(\n",
       "      (att): MultiHeadAttention(\n",
       "        (W_query): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_key): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (W_value): Linear(in_features=768, out_features=768, bias=False)\n",
       "        (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "      (ff): FeedForward(\n",
       "        (layers): Sequential(\n",
       "          (0): Linear(in_features=768, out_features=3072, bias=True)\n",
       "          (1): GELU()\n",
       "          (2): Linear(in_features=3072, out_features=768, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (norm1): LayerNorm()\n",
       "      (norm2): LayerNorm()\n",
       "      (drop_shortcut): Dropout(p=0.1, inplace=False)\n",
       "    )\n",
       "  )\n",
       "  (final_norm): LayerNorm()\n",
       "  (out_head): Linear(in_features=768, out_features=50257, bias=False)\n",
       ")"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "GPT_CONFIG_124M = {\n",
    "    \"vocab_size\": 50257,   # Vocabulary size\n",
    "    \"context_length\": 256, # Shortened context length (orig: 1024)\n",
    "    \"emb_dim\": 768,        # Embedding dimension\n",
    "    \"n_heads\": 12,         # Number of attention heads\n",
    "    \"n_layers\": 12,        # Number of layers\n",
    "    \"drop_rate\": 0.1,      # Dropout rate\n",
    "    \"qkv_bias\": False      # Query-key-value bias\n",
    "}\n",
    "\n",
    "torch.manual_seed(123)\n",
    "model = GPTModel(GPT_CONFIG_124M)\n",
    "model.eval()   # disable dropout during infrence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "output text:\n",
      " Every effort moves you rentingetic wasnم refres RexMeCHicular stren\n"
     ]
    }
   ],
   "source": [
    "import tiktoken\n",
    "\n",
    "def text_to_token_ids(text, tokenizer):\n",
    "    encoded = tokenizer.encode(text, allowed_special={'<|endoftext|'})\n",
    "    encoded_tensor = torch.tensor(encoded).unsqueeze(0)  # add batch dimention\n",
    "    return encoded_tensor\n",
    "\n",
    "def token_ids_to_text(token_ids, tokenizer):\n",
    "    flat = token_ids.squeeze(0)  # remove batch dimention\n",
    "    return tokenizer.decode(flat.tolist())\n",
    "\n",
    "start_context = \"Every effort moves you\"\n",
    "tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
    "\n",
    "token_ids = generate_text_simple(\n",
    "    model = model,\n",
    "    idx = text_to_token_ids(start_context, tokenizer),\n",
    "    max_new_tokens = 10,\n",
    "    context_size=GPT_CONFIG_124M[\"context_length\"]\n",
    ")\n",
    "\n",
    "print(\"output text:\\n\", token_ids_to_text(token_ids, tokenizer))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "inputs = torch.tensor([[16833, 3626, 6100],   # [\"every effort moves\",\n",
    "                       [40,    1107, 588]])   #  \"I really like\"]\n",
    "\n",
    "targets = torch.tensor([[3626, 6100, 345  ],  # [\" effort moves you\",\n",
    "                        [1107,  588, 11311]]) #  \" really like chocolate\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[[ 0.1113, -0.1057, -0.3666,  ...,  0.2843, -0.8824,  0.1074],\n",
      "         [-0.6109, -0.5167, -0.7613,  ...,  0.5450, -1.0319, -0.2175],\n",
      "         [ 0.5707, -0.6459, -0.0701,  ...,  0.7419, -0.1806, -0.2217]],\n",
      "\n",
      "        [[-0.2968,  0.1949, -0.1649,  ..., -0.4867,  0.7218, -0.1714],\n",
      "         [-0.8375,  0.0612, -0.4641,  ...,  0.2327, -0.3889, -0.0770],\n",
      "         [ 0.5614,  0.6919,  0.8915,  ..., -0.9472,  1.2411, -0.2056]]])\n",
      "\n",
      "\n",
      "==================================================\n",
      "                      OUT\n",
      "==================================================\n",
      "tensor([[[1.8849e-05, 1.5172e-05, 1.1687e-05,  ..., 2.2409e-05,\n",
      "          6.9776e-06, 1.8776e-05],\n",
      "         [9.1569e-06, 1.0062e-05, 7.8786e-06,  ..., 2.9090e-05,\n",
      "          6.0103e-06, 1.3571e-05],\n",
      "         [2.9877e-05, 8.8507e-06, 1.5741e-05,  ..., 3.5456e-05,\n",
      "          1.4094e-05, 1.3526e-05]],\n",
      "\n",
      "        [[1.2561e-05, 2.0538e-05, 1.4332e-05,  ..., 1.0389e-05,\n",
      "          3.4784e-05, 1.4239e-05],\n",
      "         [7.2731e-06, 1.7864e-05, 1.0565e-05,  ..., 2.1206e-05,\n",
      "          1.1390e-05, 1.5559e-05],\n",
      "         [2.9496e-05, 3.3605e-05, 4.1029e-05,  ..., 6.5249e-06,\n",
      "          5.8203e-05, 1.3698e-05]]])\n"
     ]
    }
   ],
   "source": [
    "with torch.no_grad():\n",
    "    logits = model(inputs)\n",
    "print(logits)\n",
    "print(f\"\\n\\n{50*'='}\\n{22*' '}OUT\\n{50*'='}\")\n",
    "probas = torch.softmax(logits, dim=-1)\n",
    "print(probas)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "token ids:\n",
      " tensor([[[16657],\n",
      "         [  339],\n",
      "         [42826]],\n",
      "\n",
      "        [[49906],\n",
      "         [29669],\n",
      "         [41751]]])\n"
     ]
    }
   ],
   "source": [
    "token_ids = torch.argmax(probas, dim=-1, keepdim=True)\n",
    "print(\"token ids:\\n\", token_ids)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Targets batch 1:  effort moves you\n",
      "Outputs batch 1:  Armed heNetflix\n"
     ]
    }
   ],
   "source": [
    "print(f\"Targets batch 1: {token_ids_to_text(targets[0], tokenizer)}\")\n",
    "print(f\"Outputs batch 1: {token_ids_to_text(token_ids[0].flatten(), tokenizer)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Text 1: tensor([7.4541e-05, 3.1061e-05, 1.1563e-05])\n",
      "text 2: tensor([1.0337e-05, 5.6776e-05, 4.7559e-06])\n"
     ]
    }
   ],
   "source": [
    "text_idx = 0\n",
    "target_probas_1 = probas[text_idx, [0,1,2], targets[text_idx]]\n",
    "print(\"Text 1:\", target_probas_1)\n",
    "\n",
    "text_idx = 1\n",
    "target_probas_2 = probas[text_idx, [0,1,2], targets[text_idx]]\n",
    "print(\"text 2:\", target_probas_2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([ -9.5042, -10.3796, -11.3677, -11.4798,  -9.7764, -12.2561])\n"
     ]
    }
   ],
   "source": [
    "log_probas = torch.log(torch.cat((target_probas_1, target_probas_2)))\n",
    "print(log_probas)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(-10.7940)\n"
     ]
    }
   ],
   "source": [
    "avg_log_probas = torch.mean(log_probas)\n",
    "print(avg_log_probas)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(10.7940)\n"
     ]
    }
   ],
   "source": [
    "neg_avg_log_probas = avg_log_probas*-1\n",
    "print(neg_avg_log_probas)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logits shape: torch.Size([2, 3, 50257])\n",
      "targets shape: torch.Size([2, 3])\n"
     ]
    }
   ],
   "source": [
    "print(\"logits shape:\", logits.shape)\n",
    "\n",
    "print(\"targets shape:\", targets.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "flattened logits torch.Size([6, 50257])\n",
      "flattened targets torch.Size([6])\n"
     ]
    }
   ],
   "source": [
    "logits_flat = logits.flatten(0,1)\n",
    "targets_flat = targets.flatten()\n",
    "\n",
    "print(\"flattened logits\", logits_flat.shape)\n",
    "print(\"flattened targets\", targets_flat.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(10.7940)\n"
     ]
    }
   ],
   "source": [
    "loss = torch.nn.functional.cross_entropy(logits_flat, targets_flat)\n",
    "print(loss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(48725.8203)\n"
     ]
    }
   ],
   "source": [
    "perplexity = torch.exp(loss)\n",
    "print(perplexity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "I HAD always thought Jack Gisburn rather a cheap genius--though a good fellow enough--so it was no \n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import urllib.request\n",
    "\n",
    "file_path = \"the-verdict.txt\"\n",
    "url = \"https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch02/01_main-chapter-code/the-verdict.txt\"\n",
    "\n",
    "with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
    "    text_data = file.read()\n",
    "\n",
    "print(text_data[:99])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "it for me! The Strouds stand alone, and happen once--but there's no exterminating our kind of art.\"\n"
     ]
    }
   ],
   "source": [
    "print(text_data[-99:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "characters: 20479\n",
      "tokens: 5145\n"
     ]
    }
   ],
   "source": [
    "total_characters = len(text_data)\n",
    "total_tokens = len(tokenizer.encode(text_data))\n",
    "\n",
    "print(\"characters:\", total_characters)\n",
    "\n",
    "print(\"tokens:\", total_tokens)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train/validation ratio\n",
    "train_ratio = 0.90\n",
    "split_idx = int(train_ratio*len(text_data))\n",
    "train_data = text_data[:split_idx]\n",
    "val_data = text_data[split_idx:]\n",
    "\n",
    "torch.manual_seed(123)\n",
    "\n",
    "train_loader = create_dataloader_v1(\n",
    "    train_data,\n",
    "    batch_size=2,\n",
    "    max_length=GPT_CONFIG_124M[\"context_length\"],\n",
    "    stride=GPT_CONFIG_124M[\"context_length\"],\n",
    "    drop_last=True,\n",
    "    shuffle=True,\n",
    "    num_workers=0\n",
    ")\n",
    "\n",
    "val_loader = create_dataloader_v1(\n",
    "    val_data,\n",
    "    batch_size=2,\n",
    "    max_length=GPT_CONFIG_124M[\"context_length\"],\n",
    "    stride=GPT_CONFIG_124M[\"context_length\"],\n",
    "    drop_last=False,\n",
    "    shuffle=False,\n",
    "    num_workers=0\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "if total_tokens*(train_ratio)<GPT_CONFIG_124M[\"context_length\"]:\n",
    "    print(\"Not enough tokens for training loader\")\n",
    "\n",
    "if total_tokens*(1-train_ratio)<GPT_CONFIG_124M[\"context_length\"]:\n",
    "    print(\"Not enough tokens for the validation loader\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train loader:\n",
      "torch.Size([2, 256]) torch.Size([2, 256])\n",
      "torch.Size([2, 256]) torch.Size([2, 256])\n",
      "torch.Size([2, 256]) torch.Size([2, 256])\n",
      "torch.Size([2, 256]) torch.Size([2, 256])\n",
      "torch.Size([2, 256]) torch.Size([2, 256])\n",
      "torch.Size([2, 256]) torch.Size([2, 256])\n",
      "torch.Size([2, 256]) torch.Size([2, 256])\n",
      "torch.Size([2, 256]) torch.Size([2, 256])\n",
      "torch.Size([2, 256]) torch.Size([2, 256])\n",
      "\n",
      " validation loader:\n",
      "torch.Size([2, 256]) torch.Size([2, 256])\n"
     ]
    }
   ],
   "source": [
    "print(\"train loader:\")\n",
    "for x,y in train_loader:\n",
    "    print(x.shape, y.shape)\n",
    "\n",
    "print(\"\\n validation loader:\")\n",
    "for x,y in val_loader:\n",
    "    print(x.shape, y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training tokens: 4608\n",
      "val tokens: 512\n",
      "all tokens: 5120\n"
     ]
    }
   ],
   "source": [
    "train_tokens = 0\n",
    "for input_batch, target_batch in train_loader:\n",
    "    train_tokens += input_batch.numel()\n",
    "\n",
    "val_tokens = 0\n",
    "for input_batch, target_batch in val_loader:\n",
    "    val_tokens += input_batch.numel()\n",
    "\n",
    "print(\"Training tokens:\", train_tokens)\n",
    "print(\"val tokens:\", val_tokens)\n",
    "print(\"all tokens:\", train_tokens+val_tokens)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calc_loss_batch(input_batch, target_batch, model, device):\n",
    "    input_batch, target_batch = input_batch.to(device), target_batch.to(device)\n",
    "    logits = model(input_batch)\n",
    "    loss = torch.nn.functional.cross_entropy(logits.flatten(0,1), target_batch.flatten())\n",
    "    return loss\n",
    "\n",
    "def calc_loss_loader(data_loader, model, device, num_batches=None):\n",
    "    total_loss = 0.\n",
    "    if len(data_loader) == 0:\n",
    "        return float(\"nan\")\n",
    "    elif num_batches is None:\n",
    "        num_batches = len(data_loader)\n",
    "    else:\n",
    "        # reduce the number of batches to match the total number of batches in the data\n",
    "        # if num_batches exceeds the number of batches in the data loader\n",
    "        num_batches = min(num_batches, len(data_loader))\n",
    "    for i, (input_batch, target_batch) in enumerate(data_loader):\n",
    "        if i<num_batches:\n",
    "            loss = calc_loss_batch(input_batch, target_batch, model, device)\n",
    "            total_loss += loss.item()\n",
    "        else:\n",
    "            break\n",
    "    return total_loss/num_batches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training loss: 10.98758347829183\n",
      "val loss: 10.98110580444336\n"
     ]
    }
   ],
   "source": [
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "model.to(device) # no assgigment model = model.to(device) necessary for nn.Module class\n",
    "\n",
    "torch.manual_seed(123)\n",
    "\n",
    "with torch.no_grad(): # disable gradient tracking for efficiency because we are not training yet\n",
    "    train_loss = calc_loss_loader(train_loader, model, device)\n",
    "    val_loss = calc_loss_loader(val_loader, model, device)\n",
    "\n",
    "print(\"training loss:\", train_loss)\n",
    "print(\"val loss:\", val_loss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train_model_simple(model, train_loader, val_loader, optimizer, device, num_epochs,\n",
    "                       eval_freq, eval_iter, start_context, tokenizer):\n",
    "    # init lists to track losses and tokens seen\n",
    "    train_losses, val_losses, track_tokens_seen = [], [], []\n",
    "    tokens_seen, global_step = 0, -1\n",
    "\n",
    "    # main training loop\n",
    "    for epoch in range(num_epochs):\n",
    "        model.train() # set model to train mode\n",
    "\n",
    "        for input_batch, target_batch in train_loader:\n",
    "            optimizer.zero_grad() # reset loss gradients from previous batch iter\n",
    "            loss = calc_loss_batch(input_batch, target_batch, model, device)\n",
    "            loss.backward() # calc loss gradient\n",
    "            optimizer.step() # update model weights using loss gradients\n",
    "            tokens_seen += input_batch.numel()\n",
    "            global_step += 1\n",
    "\n",
    "            # optional evaluation step\n",
    "            if global_step%eval_freq == 0:\n",
    "                train_loss, val_loss = evaluate_model(\n",
    "                    model, train_loader, val_loader, device, eval_iter\n",
    "                )\n",
    "                train_losses.append(train_loss)\n",
    "                val_losses.append(val_loss)\n",
    "                track_tokens_seen.append(tokens_seen)\n",
    "                print(f\" ep {epoch+1} (step {global_step:06d}): \"\n",
    "                      f\"train loss {train_loss:.3f}, val loss {val_loss:.3f}\")\n",
    "                \n",
    "        generate_and_print_sample(model, tokenizer, device, start_context)\n",
    "    return train_losses,val_losses, track_tokens_seen\n",
    "\n",
    "def evaluate_model(model, train_loader, val_loader, device, eval_iter):\n",
    "    model.eval()\n",
    "    with torch.no_grad():\n",
    "        train_loss = calc_loss_loader(train_loader, model, device, num_batches=eval_iter)\n",
    "        val_loss = calc_loss_loader(val_loader, model, device, num_batches=eval_iter)\n",
    "    model.train()\n",
    "    return train_loss, val_loss\n",
    "\n",
    "def generate_and_print_sample(model, tokenizer, device, start_context):\n",
    "    model.eval()\n",
    "    context_size = model.pos_emb.weight.shape[0]\n",
    "    encoded = text_to_token_ids(start_context, tokenizer).to(device)\n",
    "    with torch.no_grad():\n",
    "        token_ids = generate_text_simple(model=model, idx=encoded,max_new_tokens=50,context_size=context_size)\n",
    "        decoded_text = token_ids_to_text(token_ids, tokenizer)\n",
    "        print(decoded_text.replace(\"\\n\", \" \")) # compact print\n",
    "\n",
    "    model.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ep 1 (step 000000): train loss 9.783, val loss 9.927\n",
      " ep 1 (step 000005): train loss 7.985, val loss 8.335\n",
      "Every effort moves you,,,,,,,,,,,,.                                     \n",
      " ep 2 (step 000010): train loss 6.753, val loss 7.048\n",
      " ep 2 (step 000015): train loss 6.114, val loss 6.573\n",
      "Every effort moves you, and,, and, and,,,,, and, and,,,,,,,,,,,,,, and,,,, and,, and,,,,, and,,,,,,\n",
      " ep 3 (step 000020): train loss 5.525, val loss 6.490\n",
      " ep 3 (step 000025): train loss 5.324, val loss 6.387\n",
      "Every effort moves you, and to the picture.                      \"I, and the of the of the's the honour, and, and I had been, and I\n",
      " ep 4 (step 000030): train loss 4.761, val loss 6.360\n",
      " ep 4 (step 000035): train loss 4.461, val loss 6.258\n",
      "Every effort moves you of the to the picture--as of the picture--as I had been \" it was his \" I was the     \"I was his I had been the his pictures--and it the picture and I had been the picture of\n",
      " ep 5 (step 000040): train loss 3.833, val loss 6.196\n",
      "Every effort moves you know the \"Oh, and he was not the fact by his last word.         \"I was.      \"Oh, I felt a little a little the    \n",
      " ep 6 (step 000045): train loss 3.352, val loss 6.139\n",
      " ep 6 (step 000050): train loss 2.861, val loss 6.112\n",
      "Every effort moves you know; and my dear, and he was not the fact with a little of the house of the fact of the fact, and.                       \n",
      " ep 7 (step 000055): train loss 2.347, val loss 6.138\n",
      " ep 7 (step 000060): train loss 2.084, val loss 6.179\n",
      "Every effort moves you know,\" was one of the picture for nothing--I told Mrs.  \"I looked--as of the fact, and I felt him--his back his head to the donkey. \"Oh, and_--because he had always _\n",
      " ep 8 (step 000065): train loss 1.521, val loss 6.176\n",
      " ep 8 (step 000070): train loss 1.272, val loss 6.178\n",
      "Every effort moves you?\" \"I didn't bear the picture--I told me.  \"I looked up, and went on groping and Mrs. I was back the head to look up at the honour being _mine_--because he was when I\n",
      " ep 9 (step 000075): train loss 1.000, val loss 6.277\n",
      " ep 9 (step 000080): train loss 0.718, val loss 6.281\n",
      "Every effort moves you?\"  \"Yes--quite insensible to the irony. She wanted him vindicated--and by me!\"  He laughed again, and threw back his head to look up at the sketch of the donkey. \"There were days when I\n",
      " ep 10 (step 000085): train loss 0.506, val loss 6.325\n",
      "Every effort moves you?\"  \"Yes--quite insensible to the irony. She wanted him vindicated--and by me!\"  He laughed again, and threw back his head to the donkey again. I saw that, and down the room, when I\n"
     ]
    }
   ],
   "source": [
    "torch.manual_seed(123)\n",
    "model = GPTModel(GPT_CONFIG_124M)\n",
    "model.to(device)\n",
    "optimizer = torch.optim.AdamW(model.parameters(), lr=0.0004, weight_decay=0.1)\n",
    "\n",
    "num_epochs = 10\n",
    "train_losses, val_losses, tokens_seen = train_model_simple(\n",
    "    model, train_loader, val_loader, optimizer,device,num_epochs=num_epochs,eval_freq=5,\n",
    "    eval_iter=5, start_context=\"Every effort moves you\", tokenizer=tokenizer\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeoAAAEiCAYAAAA21pHjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABUBUlEQVR4nO3dd3hUxfrA8e+mbHonlRRaJKEHCBhiJ1JFQAFLVLALQUCuDQuCDUEuF0EuinrBnzRRioighi4IJJRQJPQ0ShIgvbf5/bFhk6UHEnYT3s/z7MPunDln3z0keXfmzJnRKKUUQgghhDBJZsYOQAghhBBXJolaCCGEMGGSqIUQQggTJolaCCGEMGGSqIUQQggTJolaCCGEMGGSqIUQQggTJolaCCGEMGGSqIUQQggTJolaiAYgMTERjUZDXFycsUMRQtQySdRCmAiNRnPVx4QJE4wdohDCCCyMHYAQQufMmTP65z/++CPjx4/n8OHD+jJ7e3tjhCWEMDJpUQthIry8vPQPJycnNBqN/rWHhwfTpk3D19cXKysrOnTowO+//37FY5WXl/Pcc88RFBREcnIyAL/88gsdO3bE2tqaZs2aMXHiRMrKyvT7aDQavv32WwYOHIitrS2BgYGsXLlSvz0zM5PIyEjc3d2xsbEhMDCQuXPnXjGGn3/+mbZt22JjY4ObmxsRERHk5+frt3/77bcEBwdjbW1NUFAQ//3vfw32T0lJYciQITg7O+Pq6kr//v1JTEzUbx82bBgDBgxg6tSpeHt74+bmRlRUFKWlpdd9zoWoF5QQwuTMnTtXOTk56V9PmzZNOTo6qkWLFqlDhw6pN998U1laWqojR44opZRKSEhQgNqzZ48qKipSAwcOVCEhISo9PV0ppdTmzZuVo6Ojmjdvnjp+/Lj6888/VZMmTdSECRP07wEoX19ftXDhQnX06FE1atQoZW9vr86fP6+UUioqKkp16NBBxcbGqoSEBBUdHa1Wrlx52fhPnz6tLCws1LRp01RCQoLat2+fmjVrlsrNzVVKKTV//nzl7e2tli5dqk6cOKGWLl2qXF1d1bx585RSSpWUlKjg4GD13HPPqX379qmDBw+qJ598UrVs2VIVFxcrpZQaOnSocnR0VK+88oqKj49Xv/76q7K1tVVz5syp3f8MIYxMErUQJujiRO3j46M++eQTgzqhoaFqxIgRSqmqRP3XX3+p7t27q7vuuktlZWXp63bv3l19+umnBvv/8MMPytvbW/8aUO+9957+dV5engLUmjVrlFJK9evXTz377LPXFf+uXbsUoBITEy+7vXnz5mrhwoUGZR999JEKCwvTx9ayZUtVUVGh315cXKxsbGzUH3/8oZTSJeqAgABVVlamrzN48GD12GOPXVeMQtQXco1aCBOXk5PD6dOnCQ8PNygPDw9n7969BmVPPPEEvr6+rF+/HhsbG3353r172bp1K5988om+rLy8nKKiIgoKCrC1tQWgXbt2+u12dnY4OjqSnp4OwPDhw3n00UfZvXs3PXr0YMCAAXTr1u2yMbdv357u3bvTtm1bevbsSY8ePRg0aBAuLi7k5+dz/Phxnn/+eV588UX9PmVlZTg5OenjPXbsGA4ODgbHLSoq4vjx4/rXrVu3xtzcXP/a29ub/fv3X+VsClH/SKIWogHp06cP8+fPZ9u2bTzwwAP68ry8PCZOnMgjjzxyyT7W1tb655aWlgbbNBoNFRUVAPTu3ZukpCRWr15NdHQ03bt3JyoqiqlTp15yTHNzc6Kjo/n777/5888/mTlzJu+++y47duzQfyn45ptv6Nq16yX7XYi3U6dOLFiw4JJju7u7X1e8QjQUkqiFMHGOjo74+PiwdetW7r33Xn351q1b6dKli0Hd4cOH06ZNGx5++GF+++03ff2OHTty+PBhWrRocVOxuLu7M3ToUIYOHcrdd9/NG2+8cdlEDbqkGR4eTnh4OOPHjycgIIDly5czduxYfHx8OHHiBJGRkZfdt2PHjvz44494eHjg6Oh4UzELUd9JohaiHnjjjTf44IMPaN68OR06dGDu3LnExcVdtsX56quvUl5ezkMPPcSaNWu46667GD9+PA899BD+/v4MGjQIMzMz9u7dy4EDB/j444+vK4bx48fTqVMnWrduTXFxMatWrSI4OPiydXfs2MG6devo0aMHHh4e7Nixg7Nnz+rrT5w4kVGjRuHk5ESvXr0oLi5m586dZGZmMnbsWCIjI/n888/p378/H374Ib6+viQlJbFs2TLefPNNfH19b/xkClHPSKIWoh4YNWoU2dnZ/Otf/yI9PZ1WrVqxcuVKAgMDL1t/zJgxVFRU0KdPH37//Xd69uzJqlWr+PDDD5k8eTKWlpYEBQXxwgsvXHcMWq2WcePGkZiYiI2NDXfffTeLFy++bF1HR0c2b97M9OnTycnJISAggH//+9/07t0bgBdeeAFbW1s+//xz3njjDezs7Gjbti1jxowBwNbWls2bN/PWW2/xyCOPkJubS+PGjenevbu0sMVtR6OUUsYOQgghhBCXJxOeCCGEECZMErUQQghhwiRRCyGEECZMErUQQghhwiRRCyGEECZMErUQQghhwiRRX8GsWbNo0qQJ1tbWdO3alZiYGGOHZBI2b95Mv3798PHxQaPRsGLFCoPtSinGjx+Pt7c3NjY2REREcPToUYM6GRkZREZG4ujoiLOzM88//zx5eXkGdfbt28fdd9+NtbU1fn5+TJky5ZJYfvrpJ4KCgrC2tqZt27asXr261j/vrTRp0iRCQ0NxcHDAw8ODAQMGGKxHDbq5rqOionBzc8Pe3p5HH32UtLQ0gzrJycn07dsXW1tbPDw8eOONNwyWswTYuHEjHTt2xMrKihYtWjBv3rxL4mmIvwOzZ8+mXbt2ODo64ujoSFhYGGvWrNFvl/Nbuz777DM0Go3+/niQc3xDjLwoiElavHix0mq16n//+5/6559/1IsvvqicnZ1VWlqasUMzutWrV6t3331XLVu2TAFq+fLlBts/++wz5eTkpFasWKH27t2rHn74YdW0aVNVWFior9OrVy/Vvn17tX37dvXXX3+pFi1aqCeeeEK/PTs7W3l6eqrIyEh14MABtWjRImVjY6O+/vprfZ2tW7cqc3NzNWXKFHXw4EH13nvvKUtLS7V///46Pwd1pWfPnmru3LnqwIEDKi4uTvXp00f5+/urvLw8fZ1XXnlF+fn5qXXr1qmdO3eqO++8U3Xr1k2/vaysTLVp00ZFRESoPXv2qNWrV6tGjRqpcePG6eucOHFC2draqrFjx6qDBw+qmTNnKnNzc/X777/r6zTU34GVK1eq3377TR05ckQdPnxYvfPOO8rS0lIdOHBAKSXntzbFxMSoJk2aqHbt2qnRo0fry+Uc15wk6svo0qWLioqK0r8uLy9XPj4+atKkSUaMyvRcnKgrKiqUl5eX+vzzz/VlWVlZysrKSi1atEgppdTBgwcVoGJjY/V11qxZozQajTp16pRSSqn//ve/ysXFRb/usFJKvfXWW6ply5b610OGDFF9+/Y1iKdr167q5ZdfrtXPaEzp6ekKUJs2bVJK6c6lpaWl+umnn/R14uPjFaC2bdumlNJ9kTIzM1Opqan6OrNnz1aOjo768/nmm2+q1q1bG7zXY489pnr27Kl/fTv9Dri4uKhvv/1Wzm8tys3NVYGBgSo6Olrde++9+kQt5/jGSNf3RUpKSti1axcRERH6MjMzMyIiIti2bZsRIzN9CQkJpKamGpw7Jycnunbtqj9327Ztw9nZmc6dO+vrREREYGZmxo4dO/R17rnnHrRarb5Oz549OXz4MJmZmfo61d/nQp2G9H+UnZ0NgKurKwC7du2itLTU4HMHBQXh7+9vcH7btm2Lp6envk7Pnj3Jycnhn3/+0de52rm7XX4HysvLWbx4Mfn5+YSFhcn5rUVRUVH07dv3kvMg5/jGyFzfFzl37hzl5eUGPyQAnp6eHDp0yEhR1Q+pqakAlz13F7alpqbi4eFhsN3CwgJXV1eDOk2bNr3kGBe2ubi4kJqaetX3qe8qKioYM2YM4eHhtGnTBtB9dq1Wi7Ozs0Hdi8/v5c7LhW1Xq5OTk0NhYSGZmZkN+ndg//79hIWFUVRUhL29PcuXL6dVq1bExcXJ+a0FixcvZvfu3cTGxl6yTX6Gb4wkaiFMUFRUFAcOHGDLli3GDqXBadmyJXFxcWRnZ/Pzzz8zdOhQNm3aZOywGoSUlBRGjx5NdHS0wTrn4uZI1/dFGjVqhLm5+SWjENPS0vDy8jJSVPXDhfNztXPn5eVFenq6wfaysjIyMjIM6lzuGNXf40p1GsL/0ciRI1m1ahUbNmwwWM7Ry8uLkpISsrKyDOpffH5v9Nw5OjpiY2PT4H8HtFotLVq0oFOnTkyaNIn27dvzxRdfyPmtBbt27SI9PZ2OHTtiYWGBhYUFmzZtYsaMGVhYWODp6Snn+AZIor6IVqulU6dOrFu3Tl9WUVHBunXrCAsLM2Jkpq9p06Z4eXkZnLucnBx27NihP3dhYWFkZWWxa9cufZ3169dTUVFB165d9XU2b95MaWmpvk50dDQtW7bExcVFX6f6+1yoU5//j5RSjBw5kuXLl7N+/fpLuv87deqEpaWlwec+fPgwycnJBud3//79Bl+GoqOjcXR0pFWrVvo6Vzt3t9vvQEVFBcXFxXJ+a0H37t3Zv38/cXFx+kfnzp2JjIzUP5dzfAOMPZrNFC1evFhZWVmpefPmqYMHD6qXXnpJOTs7G4xCvF3l5uaqPXv2qD179ihATZs2Te3Zs0clJSUppXS3Zzk7O6tffvlF7du3T/Xv3/+yt2eFhISoHTt2qC1btqjAwECD27OysrKUp6enevrpp9WBAwfU4sWLla2t7SW3Z1lYWKipU6eq+Ph49cEHH9T727OGDx+unJyc1MaNG9WZM2f0j4KCAn2dV155Rfn7+6v169ernTt3qrCwMBUWFqbffuHWlh49eqi4uDj1+++/K3d398ve2vLGG2+o+Ph4NWvWrMve2tIQfwfefvtttWnTJpWQkKD27dun3n77baXRaNSff/6plJLzWxeqj/pWSs7xjZBEfQUzZ85U/v7+SqvVqi5duqjt27cbOySTsGHDBgVc8hg6dKhSSneL1vvvv688PT2VlZWV6t69uzp8+LDBMc6fP6+eeOIJZW9vrxwdHdWzzz6rcnNzDers3btX3XXXXcrKyko1btxYffbZZ5fEsmTJEnXHHXcorVarWrdurX777bc6+9y3wuXOK6Dmzp2rr1NYWKhGjBihXFxclK2trRo4cKA6c+aMwXESExNV7969lY2NjWrUqJH617/+pUpLSw3qbNiwQXXo0EFptVrVrFkzg/e4oCH+Djz33HMqICBAabVa5e7urrp3765P0krJ+a0LFydqOcc1p1FKKeO05YUQQghxLXKNWgghhDBhkqiFEEIIEyaJWgghhDBhkqiFEEIIEyaJWgghhDBhkqiFEEIIEyaJ+iqKi4uZMGECxcXFxg6lQZLzW7fk/NY9Ocd1S86vjtxHfRU5OTk4OTmRnZ2No6OjscNpcOT81i05v3VPznHdkvOrIy1qIYQQwoRJohZCCCFMWINfj7qsrIw9e/bg6emJmVnNvpfk5uYCcOrUKXJycuoivNuanN+6Jee37sk5rlsN+fxWVFSQlpZGSEgIFhZXT8UN/hp1bGwsXbp0MXYYQgghxCViYmIIDQ29ap0G36L29PQEdCfD29vbyNEIIYQQcObMGbp06aLPUVfT4BP1he5ub29vfH19jRyNEEIIUeV6LskadTDZ5s2b6devHz4+Pmg0GlasWGGwXSnF+PHj8fb2xsbGhoiICI4ePWqcYIUQQggjMGqizs/Pp3379syaNeuy26dMmcKMGTP46quv2LFjB3Z2dvTs2ZOioqJbHKkQQghhHEbt+u7duze9e/e+7DalFNOnT+e9996jf//+APzf//0fnp6erFixgscff/xWhiqEEEIYhcleo05ISCA1NZWIiAh9mZOTE127dmXbtm1XTNTFxcUG081dGN4vhBB1rby8nNLSUmOHIUyApaUl5ubmtXIsk03UqampAJeMiPP09NRvu5xJkyYxceLEOo1NCCGqU0qRmppKVlaWsUMRJsTZ2RkvLy80Gs1NHcdkE/WNGjduHGPHjtW/PnXqFK1ataqdg5eXwbqJ0OxeaBFx7fpCiNvChSTt4eGBra3tTf9hFvWbUoqCggLS09MBbvrWYJNN1F5eXgCkpaUZfMi0tDQ6dOhwxf2srKywsrLSv67V2Wxivoa/Z8CeH+CljeDSpPaOLYSol8rLy/VJ2s3NzdjhCBNhY2MDQHp6Oh4eHjfVDW6yc303bdoULy8v1q1bpy/Lyclhx44dhIWF3fJ4ysormJV3L0cs7oDCTPjxKSgpuOVxCCFMy4Vr0ra2tkaORJiaCz8TNztuwaiJOi8vj7i4OOLi4gDdALK4uDiSk5PRaDSMGTOGjz/+mJUrV7J//36eeeYZfHx8GDBgwC2PNaOghDl/n2Zo3qsUWLhA6n5Y9Ro07BlYhRDXSbq7xcVq62fCqIl6586dhISEEBISAsDYsWMJCQlh/PjxALz55pu8+uqrvPTSS4SGhpKXl8fvv/+OtbX1LY/Vw8GaTwe25QxuvFAwAqUxh32LIeabWx6LEEKI24dRE/V9992HUuqSx7x58wDdt5EPP/yQ1NRUioqKWLt2LXfccYfR4u3bzptHQhrzd0VrZlk8oyv8YxwkbTNaTEIIYQqaNGnC9OnTr7v+xo0b0Wg0dT5Sft68eTg7O9fpe9Q1k71Gbaom9G9NY2cbpuZGEOf0AFSUwU9DIeeMsUMTQojrdt999zFmzJhaO15sbCwvvfTSddfv1q0bZ86cwcnJqdZiaKgkUdeQo7Ul04a0R6PR8ETaU+Q63gF5abpkXVZi7PCEEKLWKKUoKyu7rrru7u41GlCn1Wpr5R7j24Ek6hvQtZkbL9/TnEKsicwdSYWVI6TsgD/eMXZoQghxTcOGDWPTpk188cUXaDQaNBoNiYmJ+u7oNWvW0KlTJ6ysrNiyZQvHjx+nf//+eHp6Ym9vT2hoKGvXrjU45sVd3xqNhm+//ZaBAwdia2tLYGAgK1eu1G+/uOv7Qhf1H3/8QXBwMPb29vTq1YszZ6p6K8vKyhg1ahTOzs64ubnx1ltvMXTo0BoPMJ49ezbNmzdHq9XSsmVLfvjhB/02pRQTJkzA398fKysrfHx8GDVqlH77f//7XwIDA7G2tsbT05NBgwbV6L1vhCTqGzT2wTto5e3IvsJGfOH4pq4w9huIW2jcwIQQRqeUoqCk7JY/1HXehfLFF18QFhbGiy++yJkzZzhz5gx+fn767W+//TafffYZ8fHxtGvXjry8PPr06cO6devYs2cPvXr1ol+/fiQnJ1/1fSZOnMiQIUPYt28fffr0ITIykoyMjCvWLygoYOrUqfzwww9s3ryZ5ORkXn/9df32yZMns2DBAubOncvWrVvJycm5ZNXFa1m+fDmjR4/mX//6FwcOHODll1/m2WefZcOGDQAsXbqU//znP3z99dccPXqUFStW0LZtW0A3AHrUqFF8+OGHHD58mN9//5177rmnRu9/I0x2whNTp7UwY/rjHXho5ha+SGnGA62H0/74bPjjXQjuB1YOxg5RCGEkhaXltBr/xy1/34Mf9sRWe+0/605OTmi1WmxtbfWTS1X34Ycf8uCDD+pfu7q60r59e/3rjz76iOXLl7Ny5UpGjhx5xfcZNmwYTzzxBACffvopM2bMICYmhl69el22fmlpKV999RXNmzcHYOTIkXz44Yf67TNnzmTcuHEMHDgQgC+//JLVq1df8/NWN3XqVIYNG8aIESMA3d1G27dvZ+rUqdx///0kJyfj5eVFREQElpaW+Pv706VLFwCSk5Oxs7PjoYcewsHBgYCAAP1dS3VJWtQ34Q5PB97uFQTA40fuJrvNUBj6qyRpIUS91rlzZ4PXeXl5vP766wQHB+Ps7Iy9vT3x8fHXbFG3a9dO/9zOzg5HR0f9tJqXY2trq0/SoJt680L97Oxs0tLS9EkTwNzcnE6dOtXos8XHxxMeHm5QFh4eTnx8PACDBw+msLCQZs2a8eKLL7J8+XL9dfoHH3yQgIAAmjVrxtNPP82CBQsoKKj7ia+kRX2ThnVrwvpD6Ww5do6nU4ew1L0VlsYOSghhVDaW5hz8sKdR3rc22NnZGbx+/fXXiY6OZurUqbRo0QIbGxsGDRpEScnVB9BaWhr+NdRoNFRUVNSo/vV259cWPz8/Dh8+zNq1a4mOjmbEiBF8/vnnbNq0CQcHB3bv3s3GjRv5888/GT9+PBMmTCA2NrZObwGTFvVNMjPTMHVwe5xsLNl3MpsZ647qNqTEwJbpRo1NCGEcGo0GW63FLX/UZAS1VqulvLz8uupu3bqVYcOGMXDgQNq2bYuXlxeJiYk3eHZujJOTE56ensTGxurLysvL2b17d42OExwczNatWw3Ktm7darB4k42NDf369WPGjBls3LiRbdu2sX//fgAsLCyIiIhgypQp7Nu3j8TERNavX38Tn+zapEVdC7ycrPlkYBtGLtzDrA3H6OFTRNtlfaCiFDyC4Y5b/81aCCGupkmTJuzYsYPExETs7e1xdXW9Yt3AwECWLVtGv3790Gg0vP/++1dtGdeVV199lUmTJtGiRQuCgoKYOXMmmZmZNfqC8sYbbzBkyBBCQkKIiIjg119/ZdmyZfpR7PPmzaO8vJyuXbtia2vL/PnzsbGxISAggFWrVnHixAnuueceXFxcWL16NRUVFbRs2bKuPjIgLepa81A7HwaGNKZCQdTqDEo6vwStBkBA+DX3FUKIW+3111/H3NycVq1a4e7uftXrzdOmTcPFxYVu3brRr18/evbsSceOHW9htDpvvfUWTzzxBM888wxhYWHY29vTs2fPGk0rPWDAAL744gumTp1K69at+frrr5k7dy733XcfoFtD+ptvviE8PJx27dqxdu1afv31V9zc3HB2dmbZsmU88MADBAcH89VXX7Fo0SJat25dR59YR6Nu9QWAW+zkyZP4+fmRkpKCr69vnb5XTlEpvaf/xamsQh7v5MNngzqA3MwvRINWVFREQkICTZs2Nco6BLeziooKgoODGTJkCB999JGxw7nE1X42apKbpEVdixytLfn3kPZoNLB412n+OJim26AUHFwJRugqEkKIhiIpKYlvvvmGI0eOsH//foYPH05CQgJPPvmksUOrU5Koa9mdzdx46e5mAIxbtp/03CJY/goseRq2TDNydEIIUX+ZmZkxb948QkNDCQ8PZ//+/axdu5bg4GBjh1anZDBZHRjb4w42Hz1H/Jkc3vp5H/9r1w3NvsWw/mPw6QAtIowdohBC1Dt+fn6XjNi+HUiLug5YWZgz/bEOaC3M2HD4LAtK74NOwwAFPz8PGQlGjlAIIUR9IYm6jrT0cuDNnroh+5/8Fs+J0PHQuBMUZcGPT0NJ3c9mI4QQov6TRF2HngtvSngLNwpLy3nt53hKB30Pdu6Qth9+Ha0bZCaEEEJchSTqOnRh1jJHawv2nsxmZmwBDJ4HGnPYvwRi5hg7RCGEECZOEnUd83ay4ZOBuiXSvtxwjF2a1tDjY93GP96BpL+NGJ0QQghTJ4n6FujX3ocBHXyoUDB2SRz5IS9Cm0FQUQZLhkLOmWsfRAghxG1JEvUtMrF/G3ycrEk6X8BHv8XDwzPAozXkp8OSZ6Ds6qvQCCGEqWnSpAnTp0+/4vZhw4YxYMCAWxZPQyWJ+hZxsrHk30M66GYti00h+lgePD4frJ3gZAz8+a6xQxRCCGGCJFHfQmHN3Xixctayt5fu46xlY3j0O7D30i3gIYQQQlxEEvUt9q8edxDk5cD5/BLeWroP1SICRu2BJrLKlhDi1pgzZw4+Pj6XLFXZv39/nnvuOQCOHz9O//798fT0xN7entDQUP1SkDequLiYUaNG4eHhgbW1NXfddZfB+tKZmZlERkbi7u6OjY0NgYGBzJ07F4CSkhJGjhyJt7c31tbWBAQEMGnSpJuKp76QRH2LWVmYM/3xDmjNzVh/KJ2FMcmgta2qkBILh34zXoBCiNpTkl/zR3lZ1f7lZbqy0sJrH7cGBg8ezPnz59mwYYO+LCMjg99//53IyEgA8vLy6NOnD+vWrWPPnj306tWLfv36XXU5zGt58803Wbp0Kd9//z27d++mRYsW9OzZk4yMDADef/99Dh48yJo1a4iPj2f27Nk0atQIgBkzZrBy5UqWLFnC4cOHWbBgAU2aNLnhWOoTk57ru7y8nAkTJjB//nxSU1Px8fFh2LBhvPfeezVaKNzUBHk58mavlnz8Wzwfr4onrJkbzdztIf0Q/DAAyorhmV+klS1EffepT833GTwPWg/UPT/0K/w0DALugmerfYGf3hYKzhvuNyH7ut/CxcWF3r17s3DhQrp37w7Azz//TKNGjbj//vsBaN++Pe3bt9fv89FHH7F8+XJWrlzJyJEja/yx8vPzmT17NvPmzaN3794AfPPNN0RHR/Pdd9/xxhtvkJycTEhICJ07dwYwSMTJyckEBgZy1113odFoCAgIqHEM9ZVJt6gnT57M7Nmz+fLLL4mPj2fy5MlMmTKFmTNnGju0m/ZceFO6Na+ctWzJXkrLK8CtBQT2gIAw3eIdQghRRyIjI1m6dCnFxcUALFiwgMcffxwzM11ayMvL4/XXXyc4OBhnZ2fs7e2Jj4+/4Rb18ePHKS0tJTy8qgFiaWlJly5diI+PB2D48OEsXryYDh068Oabb/L331XzTAwbNoy4uDhatmzJqFGj+PPPP2/0o9c7Jt2i/vvvv+nfvz99+/YFdN+uFi1aRExMjJEju3kXZi3rNX0ze1Oy+HL9MV578A54ZI7u/mpLG2OHKIS4We+crvk+5lZVz4P66Y6huahNNWb/zcUF9OvXD6UUv/32G6Ghofz111/85z//0W9//fXXiY6OZurUqbRo0QIbGxsGDRpESUnd3Urau3dvkpKSWL16NdHR0XTv3p2oqCimTp1Kx44dSUhIYM2aNaxdu5YhQ4YQERHBzz//XGfxmAqTblF369aNdevWceTIEQD27t3Lli1b9N0ml1NcXExOTo7+kZube6vCrTEfZxs+GtAG0M1atic5E8wtq5K0UrD5c0jcYsQohRA3TGtX84d5tfaTuYWu7OIv7pfbr4asra155JFHWLBgAYsWLaJly5Z07NhRv33r1q0MGzaMgQMH0rZtW7y8vEhMTLzBEwHNmzdHq9UaLFNZWlpKbGwsrVq10pe5u7szdOhQ5s+fz/Tp05kzp2qqZUdHRx577DG++eYbfvzxR5YuXaq/vt2QmXSL+u233yYnJ4egoCDMzc0pLy/nk08+0Q92uJxJkyYxceLEWxjlzenfoTHr4tNZufc0L/2wi0UvdqWFh4Nu497KNawt7eDpZeB/p3GDFUI0KJGRkTz00EP8888/PPXUUwbbAgMDWbZsGf369UOj0fD+++9fMkq8Juzs7Bg+fDhvvPEGrq6u+Pv7M2XKFAoKCnj++ecBGD9+PJ06daJ169YUFxezatUqgoODAZg2bRre3t6EhIRgZmbGTz/9hJeXF87OzjccU31h0i3qJUuWsGDBAhYuXMju3bv5/vvvmTp1Kt9///0V9xk3bhzZ2dn6x8GDB29hxDfmowFtCPJy4GxuMY/P2c7h1MpegNYDoNl9UJoP8wfByV3GDFMI0cA88MADuLq6cvjwYZ588kmDbdOmTcPFxYVu3brRr18/evbsadDivhGfffYZjz76KE8//TQdO3bk2LFj/PHHH7i4uACg1WoZN24c7dq145577sHc3JzFixcD4ODgwJQpU+jcuTOhoaEkJiayevVq/TX1hkyjlOmutejn58fbb79NVFSUvuzjjz9m/vz5HDp06LqOcfLkSfz8/EhJScHX17euQr1pGfklPPXtDg6eycHVTsv857vSysdRt271wiGQ+BdYOcHQlTLQTAgTUlRUREJCAk2bNsXa2trY4QgTcrWfjZrkJpP+KlJQUHDJtyVzc/Ob6n4xVa52Wha+2JW2jZ3IyC/hyW+3c+BUtu4e6ycWg38YFGfD//WH1JsfSCKEEKJ+MOlE3a9fPz755BN+++03EhMTWb58OdOmTWPgwIHGDq1OONtqmf9CVzr4OZNVUMqT32xnb0oWWNlD5E/gGwpFWbpknWb6XfpCCCFunkkn6pkzZzJo0CBGjBhBcHAwr7/+Oi+//DIfffSRsUOrM042lvzwfBc6B7iQU1TGU9/uYFdSJlg5QOTP4BOim+jg/x6Gs0eMHa4QQog6ZtKJ2sHBgenTp5OUlERhYSHHjx/n448/RqvVGju0OuVgbcn3z3WhS1NXcovLeOa7HcQkZICNMzy1DLzaQv5Z+L4fnD9u7HCFEELUIZNO1LczOysL5j0bSrfmbuSXlDP0fzFsO34ebF3h6V/AoxXkpeqSdUaCscMVQghRRyRRmzBbrQX/GxbK3YGNKCwt59l5MWw5eg7s3OCZldCoJeSc0l2zvnjSfiHELdUQB7mKm1NbPxMmPeGJAGtLc755pjPD5+9iw+GzPPd9LHOe7sR9LT10t2p9/zDc/S+ZclQII9FqtZiZmXH69Gnc3d3RarX1etEgcfOUUpSUlHD27FnMzMxu+nKtSd9HXRvqy33U11JcVs7IhXuIPpiG1tyM2U91pHuwJ5SVgEXDvmYvhKkrKSnhzJkzFBQUGDsUYUJsbW3x9va+bKKuSW6SFnU9YWVhzqwnOzJ68R7WHEjllfm7+PLJjvRs7VVVKTcVfvsXPPQfsPcwXrBC3Ga0Wi3+/v6UlZVRXl5u7HCECTA3N8fCwqJWelckUdcjWgszZjwRwms/xrFq3xmiFuxmxhMh9Gnrrauw/GU4sRHKiuCppUaNVYjbjUajwdLSEktLS2OHIhoYGUxWz1iamzH9sQ4MDGlMWYXi1UV7+CXulG5j32ng1xX6/tu4QQohhKg10qKuhyzMzZg6uD3mZhp+3nWS136Mo7xC8UjH5vDcH1C9q0Upw9dCCCHqFWlR11PmZhqmPNqOJ7r4UaHgXz/tZUlsimFSPrQa5vWFohzjBSqEEOKmSKKux8zMNHwyoC1P3xmAUvDm0n0s3JGs21hSAKvGQNJWWDBYZjATQoh6ShJ1PWdmpuHD/q15NrwJAO8s38//bUvUrbr15BKwdoKU7TCzI3zXE3b/n7SwhRCiHpFE3QBoNBrGP9SKl+5pBsD4X/7huy0JunWrh/0GLR4EjZkuYa98Ff7dEpa9DCc2gcymJIQQJk0GkzUQGo2Gcb2DsDTXMGvDcT5adZCy8gpevrctPPUz5JyBfYshbiGcO6J7vm8xOPlDhyegw5Pg0sTYH0MIIcRFpEXdgGg0Gl7v0ZLR3QMBmLTmEF+uP6rb6OgNd70GUTHw/Fro9CxYOUF2MmyaDF+0h+gPjBi9EEKIy5FE3cBoNBpee/AO/vXgHQBM/fMI/4k+gn6mWI0G/EKh33R4/TA8+h00ux/QgHf7qgPlpkHS37rbu4QQQhiNJOoG6tXugbzdOwiAL9Yd5bE529l+4rxhJUsbaDsInlkBrx2Aln2qtu35P5jbG5a9eOuCFkIIcQlJ1A3YK/c2Z0K/VmgtzIhJyODxOdt58pvt7EzMuLSyky9YWle9Li8FrX1la7tS/jnY95MsqSmEELeQrJ51GziTXch/NxxncWwypeW6/+577nDntYhAQvxdrrxjcR6YWVQl8G2z4I93wMoR2jwCHSLBN1RmPhNCiBqqSW6SRH0bOZlZwKwNx/lpZwplFbr/9geCPHgt4g7a+jpd+wC75sHmf+sGoF1g7QQ2LmDtDDbOl/7r3R6aP6CrqxRkJlZtlwQvhLhNSaKuRhL1pZLPFzBz/VGW7TlFeWXCfrCVJ2MiAmntc42EXVEBSVtgzwI4+AuUXaMbPORp6P+l7nlxLkyq/D9454xuUhaAjZ/pBq5dSOAXkr+tK9g2ArtGlf+6SYIXQjQIsh61uCp/N1s+H9yeEfe3YOa6o6yIO0X0wTSiD6bRu40XYyLuoKWXw+V3NjODpvfoHg9Ng+yTUJgFRVmX/9c/rGrfohywsAFVrhvIdsGZvZCw6fqCN7MAWzdoPRB6T9aVKQWbp4Kti647/sKxS/LB3ArM5cdcCFF/SYtacCw9jy/WHWXVvtP6xbYeaufD6O6BtPCwr/03LCsBC23V65RYyEwwTPCFmVBwHgrO6QaxFWRASW7VPh2fgYdn6p4X5cBnfrrn1VvqK0boJnixca7WMnfTvTa3AnPLyocWzCqfewRDUN+q94lbpCsP6lv1BeDcMchL0+1nbmG4v5WjrjfATMZpCiGuTFrUokZaeNgz84kQRt7fgi/WHWH1/lR+3Xua3/adpn+HxozqHkjTRna194bVkzTo7uv2C732fqVFVclbW+0LhCqHTsN0CftCkgZdXZQu6Rdmwvmj136P1gOrEnVFBax4Rff8jRNViXr7LNj5vysfQ2NW2XVf7ctB4466CWcuSN4BWjtoFAgWVteOSwhR+4qydXexlBZCWdG1/7X3hHZDbnmYkqiFXksvB/4b2YmDp3OYvvYIfx5MY/meU6zce5pHQhrz6gOB+LvZXvtAdcXSGpwa6x7V2bhAvy8urf/YAijMqGyRV2udF2VBeRlUlEJ5ie55eYnutU9I1f6qHFpE6G5Vq55M7dzBLbByn8p9yyuPVVoAqqLy/c7DucO6fUoLDBP1/Ed1PQQjd0GjFrqyHV/D/p+rkrv+2nzlaysHXXLX2useVvZgYS3X7IVpqqjQ/a4VZFT9PhScq3pemKm7bNXtVV1PFkDCZtj9g26dgrCoqmMtfUH3u6YUoKpNxFTt+YVtoKsbPgaahOteH/kDVr2m+/1+fEHVcb/ooPsbcb387pRELUxDKx9H5jzTmf0ns5m+9gjrDqXz066TLN9zisGdfYm6vwW+LkZM2NfL3ALsPXSPG9rfEp5aemn5/e/oHpdTXqr7I5R/ruqPUv55cPCqVqdMd996/lndALkLzh6CkzE1i9G/Gzy3pur1/EG6b/4PzwTXprqyExt1g/W09rpEb+VQ7Xll0re0rfwSYKfrypfkL6qrPrMhwLmjcGqX7ue4yV26sqJsWPREtaScofuyey1tB1Ul6vPHYf8S3fiS6on6wLLrO1Z1bQZVPa8og5xT4OBtWMfSBgo1un8trK/yr7VufE2jO2oWQy0x+UR96tQp3nrrLdasWUNBQQEtWrRg7ty5dO7c2dihNXhtfZ34blgoe5Iz+c/ao2w+cpZFMSn8vOskj4X6EXV/C7ydbK59oNuJuaUuKVdPzJfUsYCo7ZeWd3lZN8FMwTldctdfn69M+MV5uj9gJflQmq/bR3vRF6bk7bqWuqq2KtqJTbBl2vV/BjML8OkIL0RXlS17WdfyePAj8NDNeEdKLCRsvCjR2+tiuvDcXFv5qDYeQFuLl1FuRkVFVU9KeeWjohTKiisfRbp/A6oNiEzYDOeP6VpWnq10ZeeOQew3ld2jxbo7IcqKL31dVoR+EAgaeGGtrrcEYONkXYIKfRHurLzckpEAix6vfGNNtS9PFz+/6HMN/h7cmuuex36nu0zTagDc+4aurDAT5lbOQmgwRKna8+ot1uI83c/fc2ugcSdd8ZHf4c/3oO2QqkRtaQtJWy89z1aOVXdw2LpVPlwrx3JYgGuzqrq+odDjE8MygF6TDM/dhc9v8LpauZmF4eW0gG7w4gbd+JTqRsXpfi5N/IupSSfqzMxMwsPDuf/++1mzZg3u7u4cPXoUF5erTNIhal2Ivwv/91wXdiZm8J+1R9h67DzztyezZOdJngj148V7mtWPFrap8wiqSoLXUlGu606vKDMsH/Sd7ja46l8UfDtD5+crk3ye7lE96ZfkQkkBlBdXHrsMgz/aAIl/6Vok1XsSkrbC+o9r9hmd/OG1/VWv/9cb0v/RJZfmlbPgHVwJGz6tSuzVk7y5VvdH2MyiMsFWXnqwtDHs0vxlJKTsgB4fwx09dWWHVsOyl6qSs7rOJV7HZ4CZue75zv/BP8uh95SqRJ2XBju+qtl5AMP3zz+r+wJQUG2a37JiXS9LTZUVGx437QD4dakqq6iA9IM1P25BtS5it0Bodl9VSxh0/0dDfqgcvFmZkG1cLx2TcjVebXSPi3V9uebxVmfjAo0vkzdqEpsRmXSinjx5Mn5+fsydO1df1rRpUyNGdHvr3MSVBS/cyfYT55kWfYSYhAy+35bE/B3J9G/vw8v3Nr/ybV2idpmZ67qwL3YhKVUX1NdwJPuVlJfpWuol+ZcmsT6f61pizgFVZZ6tdaPv9Qm/2qO0QPeFoKykaiwA6P6YV1eco+syra7gPJyNv3a81Vk5Gr7OPqlbzrUwy7C8+p0Dl2NmqRuPYGFd1eVZXlqVqBt30r129q/ax9kf7v6Xrmv0wr6W1lXHuPDa3Eo30PDClyCbaokjbIRutj+naqN/nf1g6CqqrsNedC3WoIyqlrWzX9Ux2g3RJWnHauM6rBzgmZVVrw1ak5pLy7V2uqRrX+3LX8teusfFWj18aZm4aSZ9e1arVq3o2bMnJ0+eZNOmTTRu3JgRI0bw4otXXiiiuLiY4uKqb5SnTp2iVatWcntWLVNKse34ef678Thbjp3Tl0cEezD8vuZ0CnA1YnTC5Cil6wWoKDOcUz77lK6L2NG7qks854xuEN6F1vLlBu1VlOsuIVy4Lc7CWpfoLjizT9ez0OgOsHfXlRXnVbutzrJy32q315mZm3wXqGg4GszMZNbWul/osWPHMnjwYGJjYxk9ejRfffUVQ4cOvew+EyZMYOLEiZeUS6KuO/tOZvHVpuOsOZCqv7TVpYkrw+9rzn0t3dHIHz8hhDDQYBK1Vqulc+fO/P333/qyUaNGERsby7Zt2y67j7SojefE2TzmbD7B0t0n9Yt/BHk5MPy+5vRt642FuUwCIoQQULNEbdJ/Ob29vWnVqpVBWXBwMMnJyVfYA6ysrHB0dNQ/HBzkmumt0szdns8ebcdfbz7AS/c0w05rzqHUXEYvjuO+qRv5YVsiRaU1vMVCCCFuczeUqFNSUjh58qT+dUxMDGPGjGHOnDm1FhhAeHg4hw8fNig7cuQIAQEBV9hDmAIvJ2ve6RPM32935/Ued+Bmp+VkZiHv//IP4Z+tZ9aGY2QXlho7TCGEqBduKFE/+eSTbNiwAYDU1FQefPBBYmJiePfdd/nwww9rLbjXXnuN7du38+mnn3Ls2DEWLlzInDlziIqKuvbOwuicbC0Z+UAgW956gA/7t6axsw3n80v4/I/DhH+2nkmr40nLKTJ2mEIIYdJu6Bq1i4sL27dvp2XLlsyYMYMff/yRrVu38ueff/LKK69w4sSJWgtw1apVjBs3jqNHj9K0aVPGjh171VHfF5NFOUxHaXkFv+07w+yNxzmcprtNRmtuxqOdGvPSPc1rdz5xIYQwYXW+KEdpaSlWVrq5j9euXcvDD+vunQsKCuLMmTM3csgreuihh3jooYdq9ZjCOCzNzRgQ0pj+HXzYcDid2RuPE5uYyaKYFBbHptCnjTev3Nuctr7XWBNbCCFuIzfU9d26dWu++uor/vrrL6Kjo+nVS3fj++nTp3Fzc7vG3uJ2p9FoeCDIk59e6cZPr4TRPcgDpeC3/Wfo9+UWnvp2B1uPnaOiwmRvSBBCiFvmhlrUkydPZuDAgXz++ecMHTqU9u3bA7By5Uq6dOlyjb2FqBLaxJXQYa4cTs3l603H+WXvabYcO8eWY+ewsTSnmbsdzd3taeFR9WjiZofWwqRvWBBCiFpzw/dRl5eXk5OTYzDvdmJiIra2tnh43OBqRXVArlHXLykZBXy3JYEfY1MovMKtXOZmGgJcbWl2UQJv7m6Hg7XlZfcRQghTUucTnhQWFqKUwtZWtxBDUlISy5cvJzg4mJ49LzPXsBFJoq6fysorSM4o4Fh6HsfO5nE8Pb/y3zzyisuuuJ+XozXNPexoUZnEm1cmcXd7K5khTQhhMup8MFn//v155JFHeOWVV8jKyqJr165YWlpy7tw5pk2bxvDhw28ocCEusDA3o5m7Pc3c7elRrVwpRVpOsS6Bp+dy/Gy+PpmfzS0mNaeI1Jwith47b3A8R2sLXdJ2t6e1jyMDQhrjbFs/Vs4RQtzebqhF3ahRIzZt2kTr1q359ttvmTlzJnv27GHp0qWMHz+e+PgarnxTh6RFffvILijVtborW94XEnhKRgEXj0uzsTRncGdfngtvShO5LUwIcYvVeYu6oKBAPzXnn3/+ySOPPIKZmRl33nknSUlJN3JIIW6ak60lnQJc6BRguO5sUWk5iecrW97pefzxTxrxZ3L4v21J/LA9iQeDPXnxnmZ0DnCR7nEhhMm5oUTdokULVqxYwcCBA/njjz947bXXAEhPT8fR0fEaewtxa1lbmhPk5UiQl+5nc3T3QLYdP883f51gw+Gz/HkwjT8PptHe14kX7m5G7zZesoCIEMJk3NBfo/Hjx/P666/TpEkTunTpQlhYGKBrXYeEhNRqgELUNo1GQ7cWjZj7bBfWjr2HJ7r4obUwY+/JbF5dtId7P9/It3+dIKdI5iMXQhjfDd+elZqaypkzZ2jfvj1mZrp8HxMTg6OjI0FBQbUa5M2Qa9TiepzLK2b+9iR+2JbE+fwSAOytLHg81I9h4U3wdbE1coRCiIbklq5HfWEVLVNNgpKoRU0UlZazYs8pvt2SwLH0PEB333bvNl68cHczOvg5GzdAIUSDUOfrUVdUVPDhhx/i5OREQEAAAQEBODs789FHH1FRUXFDQQthCqwtzXm8iz9/jrmHuc+GEt7CjfIKxap9ZxgwayuDv/qbP/5JpVymNxVC3CI3NJjs3Xff5bvvvuOzzz4jPDwcgC1btjBhwgSKior45JNPajVIIW41MzMN97f04P6WHhw8ncO3W07w697TxCZmEpu4iyZutjx3V1MGdfLFVntDv0ZCCHFdbqjr28fHh6+++kq/atYFv/zyCyNGjODUqVO1FuDNkq5vUVvScor4/u9EFuxIJrtQN9DMycaSyK7+DO3WBE9HayNHKISoL+q86zsjI+OyA8aCgoLIyMi4kUMKYfI8Ha15s1cQ28Y9wIf9WxPgZkt2YSn/3XicuyavZ+ySOLafOE/RFeYoF0KIG3FDfXbt27fnyy+/ZMaMGQblX375Je3atauVwIQwVbZaC54Ja0Jk1wDWxqfx3V8JxCRmsGz3KZbtPoXW3Ix2vk50aepKaFNXOgW44CiLhQghbtANJeopU6bQt29f1q5dq7+Hetu2baSkpLB69epaDVAIU2VupqFnay96tvYiLiWL//s7kb+OneNsbjE7kzLZmZQJG49jpoEgL0dd4m7iSmhTFzwcpJtcCHF9bvj2rNOnTzNr1iwOHToEQHBwMC+99BIff/wxc+bMqdUgb4Zcoxa3klKKpPMFxCRkEJOYQWxiBknnCy6p17SRHaFNXAht4kqXpq74u9rK9KVC3EZu6X3U1e3du5eOHTtSXm461+gkUQtjS8spIjYxQ5e8EzI4nJbLxb91Hg5WdGnqqm91t/R0wMxMErcQDVWdL8ohhLh+no7WPNTOh4fa+QCQXVjKrqQMYhIyiU3MYN/JLNJzi1m17wyr9p0BdMtydm7iWtnidqFtY2e0FjL/uBC3I0nUQtxiTjaWPBDkyQNBnoBuNrQ9yVnEVnaV70rKJKeojPWH0ll/KB3QLcs5qJMvw+9rjo+zjTHDF0LcYpKohTAya0tzwpq7EdbcDYCy8goOnsnRd5XvTMokI7+EH7Yn8WNsCo+F+knCFuI2UqNE/cgjj1x1e1ZW1s3EIoQALMzNaOfrTDtfZ164uxlKKbadOM8Xa4+yIyFDErYQt5kaJWonJ6drbn/mmWduKiAhhCGNRkO35o3o1rwR246fZ/raI5KwhbiN1Oqob1Mko75FQ7Tt+Hm+WHeE7Sd0MwFqzc0kYQtRj9T5FKLG8tlnn6HRaBgzZoyxQxHCqMKau7H4pTAWvXgndzZzpaS8gh+2J3Hf5xt5b8V+TmcVGjtEIUQtqTeJOjY2lq+//lqmKBWimssl7Pnbk7n38w2SsIVoIOpFos7LyyMyMpJvvvkGFxcXY4cjhMm5OGGXlitJ2EI0EPUiUUdFRdG3b18iIiKuWbe4uJicnBz9Izc39xZEKIRpkIQtRMNj8vdRL168mN27dxMbG3td9SdNmsTEiRPrOCohTJvuvuwwg0Fn87cn60eJj7ivhQw6E6KeMOkWdUpKCqNHj2bBggVYW1/fakPjxo0jOztb/zh48GAdRymE6brQwl780p2ENXOTFrYQ9ZBJ3561YsUKBg4ciLm5ub6svLwcjUaDmZkZxcXFBtsuR27PEqLK9sqJU7adOA+ApbmGQZ18ebJLAG0aO8oKXkLcIkZbPau25ebmkpSUZFD27LPPEhQUxFtvvUWbNm2ueQxJ1EJc6uKEDRDk5cCQzn4MCGmMq53WiNEJ0fA1mNWzHBwcLknGdnZ2uLm5XVeSFkJc3p3N3LjzJTdiEjKYvz2J3/9J5VBqLh+uOsikNfFEBHsypLMfdwc2wsLcpK+QCdHgmXSiFkLUrQtrYGcXlLJy7ymW7DzJ/lPZrDmQypoDqXg6WvFoR18Gd/ajaSM7Y4crxG3JpLu+a4N0fQtRMwdP5/DTrhRW7DlFZkGpvrxLE1cGd/alT1tv7KzkO74QN6PBXKOuDZKohbgxxWXlrItP56edKWw6cpaKyr8UdlpzHmrnw5BQXzr6u8gANCFuQIO5Ri2EMB4rC3P6tPWmT1tvUrOLWLr7JD/tTCHxfAE/7kzhx50pNHO3Y3AnPx7t2BgPx+u7hVIIUTPSohZCXDelFLGJmSzZmcJv+85QWFoOgLmZhvvucGdwZz8eCPJAayED0IS4Gun6rkYStRB1I6+4jN/2neannSfZmZSpL3ez0zIwpDFDQv24w9PBiBEKYbokUVcjiVqIunf8bB4/7TzJ0t0nOZtbrC9v7+fM46F+9Gvvg70MQBNCTxJ1NZKohbh1ysor2HTkLEt2prAuPp2yyhFotlpzHmrnzWOh/nT0d5YBaOK2J4PJhBBGYWFuRvdgT7oHe3Iur5jlu0+xODaZ42fzWbLzJEt2niTQw57HQv14pKOvzIAmxHWQFrUQok4ppdiVlMni2BRW7TtNUWkFoJtnvEcrLx4L9eOuFo0wM5NWtrh9SNd3NZKohTAdOUWl/Lr3ND/GprDvZLa+vLGzDUM6+zG4s68svyluC5Koq5FELYRpOng6hyU7U1i2+yQ5RWUAaDRwT6A7j4f60T3YU27zEg2WJOpqJFELYdqKSsv5459UFsekGKzm5Wan5dFOvgzp7EcLD3sjRihE7ZNEXY0kaiHqj8Rz+SzZmcLPu06SXu02r9AmLgzp7Effdt7YamUMrKj/JFFXI4laiPqnrLyCjYfPsjg2hQ2H0ymvvM3L3sqChzv48HioH20bO8ltXqLektuzhBD1moW5GRGtPIlo5UlaThE/7zrJkp0pJJ0vYOGOZBbuSKZNY0ee6hrAwx18pJUtGjRpUQsh6oWKCsX2hPMsiU1h9YFUSsp0t3k5WFnwSMfGRN4ZIFOWinpDur6rkUQtRMOTkV/Cz7tSWLAjmaTzBfryLk1cibzTn15tvLCyMDdihEJcnXR9CyEaNFc7LS/d05wX7mrG1uPnWLA9mej4NGISM4hJzMDNTsvgzn482cUffzdbY4crxE2RRC2EqLfMzDTcHejO3YHupGYXsTg2mcUxKaTmFPHVpuN8vfk49wS689SdAdzf0h0Lc7kvW9Q/0vUthGhQysorWHconQU7ktl85Ky+3NvJmie6+PN4qB8ejtZGjFAIuUZtQBK1ELevpPP5LNyRzJKdKWQWlAJgYabhwVaePHVnAGHN3GSOcWEUkqirkUQthCgqLef3A6ks2JFEbGKmvrxpIzsiu/rzaEdfXGQlL3ELSaKuRhK1EKK6Q6k5LNiezPI9p8gr1s0xrrUw46F23jx1ZwAhfrJetqh7kqirkUQthLic/OIyfok7zfztSRw8k6MvD3CzJcTPmfaVj1bejlhbyq1eonbJ7VlCCHENdlYWPNnVnye6+BGXksX87cms2neapPMFJJ0vYEXcaUB3TTvI24F2vs508NUl7xYe9pjLtW1xi0iLWgghKmUXlrInOZO9KdnsO5nF3pNZnMsruaSerdacNo2daO/rpGt5+zrj62IjXebiujWYFvWkSZNYtmwZhw4dwsbGhm7dujF58mRatmxp7NCEEA2Qk40l97X04L6WHgAopTiVVci+k9nsTckiLiWLA6eyyS8pJyYhg5iEDP2+rnZa2vk60d7XmQ5+zrTzdcLN3spYH0U0ICadqDdt2kRUVBShoaGUlZXxzjvv0KNHDw4ePIidnZ2xwxNCNHAajQZfF1t8XWzp09YbgPIKxfGzeexN0bW496Zkcyg1h4z8EjYePsvGw1X3bvu62NDe15n2froE3jHABUuZdEXUUL3q+j579iweHh5s2rSJe+6557r2ka5vIURdKyotJ/5MTlXL+2QWJ87mX1LP28maYd2a8HgXf5xsLI0QqTAVDabr+2LZ2dkAuLq6XrFOcXExxcVVC87n5ubWeVxCiNubtaU5If4uhPi76MuyC0s5cCq7stWdRUxCBmeyi5i05hAz1h1lSKgfz4U3xc9V5iIXV1dvWtQVFRU8/PDDZGVlsWXLlivWmzBhAhMnTrykXFrUQghjKiotZ2Xcab7dcoIjaXkAmGmgVxsvnr+rGZ0CXK5xBNGQNMj7qIcPH86aNWvYsmXLVT/UxS3qU6dO0apVK0nUQgiToJRi89FzfPvXCf46ek5f3tHfmRfubkbP1l5y69dtoMF1fY8cOZJVq1axefPma34gKysrrKyqRlrm5ORcpbYQQtxaGo2Ge+9w59473DmUmsN3fyXwS9xpdidnMWLBbvxcbXi2W1OGhPphb1Uv/kSLOmbSLWqlFK+++irLly9n48aNBAYG1vgYMphMCGHq0nOL+GFbEvO3J+kXD3GwtuDJLv4MC2+Ct5ONkSMUta3BdH2PGDGChQsX8ssvvxjcO+3k5ISNzfX94EqiFkLUF4Ul5SzdfZL/bUngxDndqHELMw1923nz4t3NaNPYycgRitrSYBL1lWb5mTt3LsOGDbuuY0iiFkLUNxUVivWH0vl2ywm2n6iaVOXOZq68cFczHgjykOU567kGc43ahL9DCCFEnTEz0xDRypOIVp7sP5nNd1tOsGrfGbafyGD7iQyaNbLjubua8mhHX2y0smBIQ2fSLeraIC1qIURDcDqrkO//TmRhTDK5RbrlOV1sLXnqzgCeDgvAw8HayBGKmmgwXd+1QRK1EKIhySsu46edKfxvawIpGYUAaDTQtJEdbXycaNvYiTaNnWjd2BFHa5n9zFQ1mK5vIYQQhuytLHg2vCnPhDXhz39S+eavE+xO1k1ZeuJsPiv3ntbXbeJmS5vGVcm7jY8TTraSvOsbSdRCCFEPmZtp6N3Wm95tvTmXV8yBU9mVjxz2n8rmVFYhiecLSDxfwKp9Z/T7+bva0rayxd22Mnm72GmN+EnEtUiiFkKIeq6RvZXB8pwAGfkl/HM6m/2VCXz/qWxSMgpJziggOaOA3/ZXJe/Gzja0bexEW98LLW9HWaLThEiiFkKIBsjVTsvdge7cHeiuL8suKOVAteR94FQ2iecLOJVVyKmsQn7/J1Vf18fJmjaNnWjn60SIvwvtfJ1wkGveRiGJWgghbhNOtpaEt2hEeItG+rLswlL+OZ3NP5Vd5gdOZXPiXD6ns4s4nV3EnwfTAN2AtUAPezr4ORPi70IHP2fu8HSQeclvAUnUQghxG3OysaRb80Z0a16VvHOLSjl4Wpe4957MZk9yJiczCzmSlseRtDyW7DwJgK3WnHa+TnTwcyHE35kQP2c8HOU2sdomiVoIIYQBB2tLujZzo2szN33Z2dxi4lKyiEvJZE9yFvtOZpNXXKafhOWCxs42la1uZzr4OdOmsRPWljIpy82QRC2EEOKa3B2seLCVJw+28gSgvEJxLD2PuJRM4lKy2JOcxZG0XP317guD1SzMNAR7Oxok76aN7K44RbS4lCRqIYQQNWZupqGllwMtvRx4LNQf0E3Gsu9klj5xx6VkcTa3mP2Vo85/2J4E6LrbO/g509HfhY4BuuQtA9WuTBK1EEKIWmFvZWFwvVspxamsQoPEvf9UNtmFpWw6cpZNR84CuoFqLT0dCPF3oaO/M50CXKTVXY0kaiGEEHVCo9Hg62KLr4stD7XzAaCkrIJDqTnsSc5id3Imu5MzScko5FBqLodSc1kUkwzo5jEP8XehU4BuoFp7X2fsrG7PlHV7fmohhBBGobUwo52vM+18nRnarQkA6blF7E6qTNxJmew7lU1mQSnrD6Wz/lA6oOtqD/Jy0HeXd/J3xc/V5rZodUuiFkIIYVQeDtb0auNFrzZegK7V/c/pbHYnVyXvM9lF/HM6h39O5+ivdTey1+pb3R0rJ2VpiCPMJVELIYQwKVoLM0L8XQjxd+F5mgJwJruQ3UlZ7ErSdZf/czqbc3klRB9MI7pyUhYLMw2tfRxpXzkZi+5hj7Nt/Z7LXBK1EEIIk+ftZEPfdjb0becNQFFpOQdOZbM7ObMyeetGmO89qZukpToPByuDxB1Y+W99GWkuiVoIIUS9Y21pTucmrnRu4groRpifzCysbG3ncCQtlyOpuZzOLiI9t5j03GK2HDtncAwfJ2t90g70dKClpwMtPOxNbtCaaUUjhBBC3ACNRoOfqy1+rrb079BYX55bVMrR9DyOpuVyODWPo+m5HEnLJS2nWD+f+YXbxC7wdbExaIHfUZnAjXX9WxK1EEKIBsvB2lI3UtzfxaA8u6CUI5VJ+2hanq4FnpbLubwSTmYWcjKzUD/iHHT3ege42hLi78J/HutwSz+DJGohhBC3HSdbS0KbuBJa2XV+QUZ+SWXyzuVwWi5H0nSt8cyCUhLPFxhlYJokaiGEEKKSq52WO5u5cWe1BUmUUpzLK+FoWi4V6tbHJIlaCCGEuAqNRoO7gxXuDlZGeX8zo7yrEEIIIa6LJGohhBDChEmiFkIIIUyYJGohhBDChEmiFkIIIUxYgx/1XVFRAcCZM2eMHIkQQgihcyEnXchRV9PgE3Vamm5VlS5duhg5EiGEEMJQWloa/v7+V62jUUoZ4fbtW6esrIw9e/bg6emJmdnN9fTn5ubSqlUrDh48iIODQy1F2LDJOas5OWc1J+es5uSc1VxtnrOKigrS0tIICQnBwuLqbeYGn6hrU05ODk5OTmRnZ+Po6GjscOoFOWc1J+es5uSc1Zycs5oz1jmTwWRCCCGECZNELYQQQpgwSdQ1YGVlxQcffICVlXHme62P5JzVnJyzmpNzVnNyzmrOWOdMrlELIYQQJkxa1EIIIYQJk0QthBBCmDBJ1EIIIYQJk0RdA7NmzaJJkyZYW1vTtWtXYmJijB2SyZo0aRKhoaE4ODjg4eHBgAEDOHz4sLHDqjc+++wzNBoNY8aMMXYoJu3UqVM89dRTuLm5YWNjQ9u2bdm5c6exwzJZ5eXlvP/++zRt2hQbGxuaN2/ORx99hAxVMrR582b69euHj48PGo2GFStWGGxXSjF+/Hi8vb2xsbEhIiKCo0eP1lk8kqiv048//sjYsWP54IMP2L17N+3bt6dnz56kp6cbOzSTtGnTJqKioti+fTvR0dGUlpbSo0cP8vPzjR2ayYuNjeXrr7+mXbt2xg7FpGVmZhIeHo6lpSVr1qzh4MGD/Pvf/8bFxcXYoZmsyZMnM3v2bL788kvi4+OZPHkyU6ZMYebMmcYOzaTk5+fTvn17Zs2addntU6ZMYcaMGXz11Vfs2LEDOzs7evbsSVFRUd0EpMR16dKli4qKitK/Li8vVz4+PmrSpElGjKr+SE9PV4DatGmTsUMxabm5uSowMFBFR0ere++9V40ePdrYIZmst956S911113GDqNe6du3r3ruuecMyh555BEVGRlppIhMH6CWL1+uf11RUaG8vLzU559/ri/LyspSVlZWatGiRXUSg7Sor0NJSQm7du0iIiJCX2ZmZkZERATbtm0zYmT1R3Z2NgCurq5GjsS0RUVF0bdvX4OfNXF5K1eupHPnzgwePBgPDw9CQkL45ptvjB2WSevWrRvr1q3jyJEjAOzdu5ctW7bQu3dvI0dWfyQkJJCammrwO+rk5ETXrl3rLB80+NWzasO5c+coLy/H09PToNzT05NDhw4ZKar6o6KigjFjxhAeHk6bNm2MHY7JWrx4Mbt37yY2NtbYodQLJ06cYPbs2YwdO5Z33nmH2NhYRo0ahVarZejQocYOzyS9/fbb5OTkEBQUhLm5OeXl5XzyySdERkYaO7R6IzU1FeCy+eDCttomiVrUuaioKA4cOMCWLVuMHYrJSklJYfTo0URHR2NtbW3scOqFiooKOnfuzKeffgpASEgIBw4c4KuvvpJEfQVLlixhwYIFLFy4kNatWxMXF8eYMWPw8fGRc2bCpOv7OjRq1Ahzc3P92tYXpKWl4eXlZaSo6oeRI0eyatUqNmzYgK+vr7HDMVm7du0iPT2djh07YmFhgYWFBZs2bWLGjBlYWFhQXl5u7BBNjre3N61atTIoCw4OJjk52UgRmb433niDt99+m8cff5y2bdvy9NNP89prrzFp0iRjh1ZvXPibfyvzgSTq66DVaunUqRPr1q3Tl1VUVLBu3TrCwsKMGJnpUkoxcuRIli9fzvr162natKmxQzJp3bt3Z//+/cTFxekfnTt3JjIykri4OMzNzY0doskJDw+/5Ja/I0eOEBAQYKSITF9BQQFmZoZ/9s3NzamoqDBSRPVP06ZN8fLyMsgHOTk57Nixo87ygXR9X6exY8cydOhQOnfuTJcuXZg+fTr5+fk8++yzxg7NJEVFRbFw4UJ++eUXHBwc9NdunJycsLGxMXJ0psfBweGS6/d2dna4ubnJdf0reO211+jWrRuffvopQ4YMISYmhjlz5jBnzhxjh2ay+vXrxyeffIK/vz+tW7dmz549TJs2jeeee87YoZmUvLw8jh07pn+dkJBAXFwcrq6u+Pv7M2bMGD7++GMCAwNp2rQp77//Pj4+PgwYMKBuAqqTseQN1MyZM5W/v7/SarWqS5cuavv27cYOyWQBl33MnTvX2KHVG3J71rX9+uuvqk2bNsrKykoFBQWpOXPmGDskk5aTk6NGjx6t/P39lbW1tWrWrJl69913VXFxsbFDMykbNmy47N+voUOHKqV0t2i9//77ytPTU1lZWanu3burw4cP11k8snqWEEIIYcLkGrUQQghhwiRRCyGEECZMErUQQghhwiRRCyGEECZMErUQQghhwiRRCyGEECZMErUQQghhwiRRCyGEECZMErUQotZpNBpWrFhh7DCEaBAkUQvRwAwbNgyNRnPJo1evXsYOTQhxA2RRDiEaoF69ejF37lyDMisrKyNFI4S4GdKiFqIBsrKywsvLy+Dh4uIC6LqlZ8+eTe/evbGxsaFZs2b8/PPPBvvv37+fBx54ABsbG9zc3HjppZfIy8szqPO///2P1q1bY2Vlhbe3NyNHjjTYfu7cOQYOHIitrS2BgYGsXLlSvy0zM5PIyEjc3d2xsbEhMDDwki8WQggdSdRC3Ibef/99Hn30Ufbu3UtkZCSPP/448fHxAOTn59OzZ09cXFyIjY3lp59+Yu3atQaJePbs2URFRfHSSy+xf/9+Vq5cSYsWLQzeY+LEiQwZMoR9+/bRp08fIiMjycjI0L//wYMHWbNmDfHx8cyePZtGjRrduhMgRH1SZ+tyCSGMYujQocrc3FzZ2dkZPD755BOllG4J0ldeecVgn65du6rhw4crpZSaM2eOcnFxUXl5efrtv/32mzIzM1OpqalKKaV8fHzUu+++e8UYAPXee+/pX+fl5SlArVmzRimlVL9+/dSzzz5bOx9YiAZOrlEL0QDdf//9zJ4926DM1dVV/zwsLMxgW1hYGHFxcQDEx8fTvn177Ozs9NvDw8OpqKjg8OHDaDQaTp8+Tffu3a8aQ7t27fTP7ezscHR0JD09HYDhw4fz6KOPsnv3bnr06MGAAQPo1q3bDX1WIRo6SdRCNEB2dnaXdEXXFhsbm+uqZ2lpafBao9FQUVEBQO/evUlKSmL16tVER0fTvXt3oqKimDp1aq3HK0R9J9eohbgNbd++/ZLXwcHBAAQHB7N3717y8/P127du3YqZmRktW7bEwcGBJk2asG7dupuKwd3dnaFDhzJ//nymT5/OnDlzbup4QjRU0qIWogEqLi4mNTXVoMzCwkI/YOunn36ic+fO3HXXXSxYsICYmBi+++47ACIjI/nggw8YOnQoEyZM4OzZs7z66qs8/fTTeHp6AjBhwgReeeUVPDw86N27N7m5uWzdupVXX331uuIbP348nTp1onXr1hQXF7Nq1Sr9FwUhhCFJ1EI0QL///jve3t4GZS1btuTQoUOAbkT24sWLGTFiBN7e3ixatIhWrVoBYGtryx9//MHo0aMJDQ3F1taWRx99lGnTpumPNXToUIqKivjPf/7D66+/TqNGjRg0aNB1x6fVahk3bhyJiYnY2Nhw9913s3jx4lr45EI0PBqllDJ2EEKIW0ej0bB8+XIGDBhg7FCEENdBrlELIYQQJkwStRBCCGHC5Bq1ELcZudolRP0iLWohhBDChEmiFkIIIUyYJGohhBDChEmiFkIIIUyYJGohhBDChEmiFkIIIUyYJGohhBDChEmiFkIIIUyYJGohhBDChP0/VKSd/yGQvZwAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 500x300 with 2 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.ticker import MaxNLocator\n",
    "\n",
    "def plot_losses(epochs_seen, tokens_seen, train_losses, val_losses):\n",
    "    fig, ax1 = plt.subplots(figsize=(5,3))\n",
    "\n",
    "    ax1.plot(epochs_seen, train_losses, label=\"training loss\")\n",
    "    ax1.plot(epochs_seen, val_losses, linestyle=\"-.\", label=\"val loss\")\n",
    "    ax1.set_xlabel(\"Epochs\")\n",
    "    ax1.set_ylabel(\"Loss\")\n",
    "    ax1.legend(loc=\"upper right\")\n",
    "    ax1.xaxis.set_major_locator(MaxNLocator(integer=True))\n",
    "\n",
    "    ax2 = ax1.twiny()\n",
    "    ax2.plot(tokens_seen, train_losses, alpha=0)\n",
    "    ax2.set_xlabel(\"Tokens seen\")\n",
    "\n",
    "    fig.tight_layout()\n",
    "    plt.show()\n",
    "\n",
    "epochs_tensor = torch.linspace(0, num_epochs, len(train_losses))\n",
    "plot_losses(epochs_tensor, tokens_seen, train_losses, val_losses)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "output text:\n",
      " Every effort moves you?\"\n",
      "\n",
      "\"Yes--quite insensible to the irony. She wanted him vindicated--and by me!\"\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "model.to(\"cpu\")\n",
    "model.eval()\n",
    "\n",
    "tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
    "\n",
    "token_ids = generate_text_simple(\n",
    "    model=model,\n",
    "    idx=text_to_token_ids(\"Every effort moves you\", tokenizer),\n",
    "    max_new_tokens=25,\n",
    "    context_size=GPT_CONFIG_124M['context_length']\n",
    ")\n",
    "\n",
    "print(\"output text:\\n\", token_ids_to_text(token_ids, tokenizer))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "forward\n"
     ]
    }
   ],
   "source": [
    "vocab = {\"closer\":0,\"every\":1,\"effort\":2,\"forward\":3,\"inches\":4,\"moves\":5,\"pizza\":6,\"toward\":7,\"you\":8}\n",
    "\n",
    "inverse_vocab = {v:k for k,v in vocab.items()}\n",
    "\n",
    "next_token_logits = torch.tensor([4.51, 0.89, -1.90, 6.75, 1.63, -1.62, -1.89, 6.28, 1.79])\n",
    "\n",
    "probas = torch.softmax(next_token_logits,dim=0)\n",
    "next_token_id = torch.argmax(probas).item()\n",
    "print(inverse_vocab[next_token_id])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "toward\n"
     ]
    }
   ],
   "source": [
    "torch.manual_seed(123)\n",
    "next_token_id = torch.multinomial(probas, num_samples=1).item()\n",
    "print(inverse_vocab[next_token_id])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "71 x closer\n",
      "2 x every\n",
      "0 x effort\n",
      "544 x forward\n",
      "2 x inches\n",
      "1 x moves\n",
      "0 x pizza\n",
      "376 x toward\n",
      "4 x you\n"
     ]
    }
   ],
   "source": [
    "def print_sampled_tokens(probas):\n",
    "    torch.manual_seed(123)\n",
    "    sample = [torch.multinomial(probas, num_samples=1).item() for i in range(1000)]\n",
    "    sample_ids = torch.bincount(torch.tensor(sample))\n",
    "    for i, freq in enumerate(sample_ids):\n",
    "        print(f\"{freq} x {inverse_vocab[i]}\")\n",
    "\n",
    "print_sampled_tokens(probas)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "def softmax_with_temperature(logits, temperature):\n",
    "    scaled_logits = logits/temperature\n",
    "    return torch.softmax(scaled_logits, dim=0)\n",
    "\n",
    "temperatures = [1, 0.1, 5]\n",
    "\n",
    "scaled_probas = [softmax_with_temperature(next_token_logits, T) for T in temperatures]\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = torch.arange(len(vocab))\n",
    "\n",
    "bar_width=0.15\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(5,3))\n",
    "for i, T in enumerate(temperatures):\n",
    "    rects = ax.bar(x+i*bar_width, scaled_probas[i], bar_width, label=f\"Temperature = {T}\")\n",
    "\n",
    "ax.set_ylabel(\"Probability\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
